Load libraries

Load data

Name the data you export from FileMaker Pro by their exact table names and save them as CSVs, e.g. Larvae_Morphology.csv

#setwd("~/Repos/LarvaeTransGen2018")
setwd("~/R/GitHub/LarvaeTransGen2018/data") #Elise's working directory
#upload all of the data tables for the larvae experiment
Larvae_Morphology <- read.csv("../data/Larvae_Morphology.csv") #contains data from CellProfiler from the larvae outlines
Barcode_Jar <- read.csv("../data/Barcode_Jar.csv") #contains data on CrossID, seatable, and whether or not larvae were present
Block_ID <- read.csv("../data/Block_ID.csv") #contains data on the block
Cross<- read.csv("../data/Cross.csv") #contains data on the female and male IDs for the cross and whether the cross was for QG or Meth
Fert_QG<- read.csv("../data/Fert_QG.csv") #contains data on fertilization counts, the JarIDs for the crosses
Header_WC<- read.csv("../data/Header_WC.csv") #contains data on the header tanks used to fill the jars
Larvae_Calcification <- read.csv("../data/Larvae_Calcification.csv") #contains data on filter weights and counts
Larvae_Counts <- read.csv("../data/Larvae_Counts.csv") #contains data on sedgewick rafter larvae counts
Larvae_WC <- read.csv("../data/Larvae_WC.csv") #contains data on larvae jar water chemistry
Parent_ID <- read.csv("../data/Parent_ID.csv") #contains data on the adults at the time of shucking. Includes Parent ID assignments
Larvae_Cilia<- read.csv("../data/Larvae_Cilia.csv") #contains data on cilia extrusion of the larvae photographed for morphology
Egg_Morphology<- read.csv("../data/Egg_Morphology.csv") #contains data on egg sizes from CellProfiler
Adult_Sample<- read.csv("../data/Adult_Sample.csv") #contains data on adult oysters at the time of collection
WC_Standard<- read.csv("../data/WC_Standard.csv") #contains data on the standards used for water chemistry
Tank_WC<- read.csv("../data/Tank_WC.csv") #contains data on the adult tank water chemistry

Make required fields factors:

Adult_Sample$AccTankID<- as.factor(Adult_Sample$AccTankID)
Adult_Sample$TrtTankID<- as.factor(Adult_Sample$TrtTankID)
Barcode_Jar$JarSeatable<- as.factor(Barcode_Jar$JarSeatable)
Tank_WC$ParentTrt<- as.factor(Tank_WC$ParentTrt)
Tank_WC$Tank<- as.factor(Tank_WC$Tank)
Tank_WC$Folder<- as.factor(Tank_WC$Folder)
WC_Standard$CRM<- as.factor(WC_Standard$CRM)
Larvae_WC$DateFilter<- as.factor(Larvae_WC$DateFilter)

Calculate average larval jar water chemistry based on the subset of bottles that were run on the VINDTA. May need to look to see if we want to use the mean or the median

#start by joining Barcode_Jar and Larvae_WC datasets 
JarChem<- merge(x=Barcode_Jar, y= Larvae_WC, by="JarID", all.x=TRUE)
#combine Date filter and JarTrt columns to get a unique combo value for each
JarChem<-unite(JarChem,BlockTrt, c("DateFilter","JarTrt"), sep='', remove=F)
#get only jars with larvae
JarChem<- subset(JarChem, Larvae =="TRUE")
JarChem<- subset(JarChem, DateFilter !="20180810")
boxplot(JarpHSW~BlockTrt, data=JarChem, ylim=c(6,8))

#get means for the parameters
JarMeans<- ddply(JarChem, .(BlockTrt), numcolwise(mean, na.rm=T))

#remove the unnecessary columns from JarMeans
JarMeans<- subset(JarMeans, select= c(BlockTrt,JarAlkCalc, JarpCO2, JarHCO3, JarCO3, JarCO2, JarOmegaCalcite, JarOmegaArg))   
#add Est for "estimated" to the column names
colnames(JarMeans)<- paste("Est", colnames(JarMeans), sep="")

#add columns to JarChem for mean carb values
JarChem<- merge(x=JarChem, y=JarMeans, by.x="BlockTrt", by.y="EstBlockTrt", all.x=TRUE)

#use ifelse statement to fill in the na values in the original carb fields
JarChem$JarpCO2<- ifelse((is.na(JarChem$JarpCO2)), paste(JarChem$EstJarpCO2), JarChem$JarpCO2)
JarChem$JarAlkCalc<- ifelse((is.na(JarChem$JarAlkCalc)), paste(JarChem$EstJarAlkCalc), JarChem$JarAlkCalc)
JarChem$JarHCO3<- ifelse((is.na(JarChem$JarHCO3)), paste(JarChem$EstJarHCO3), JarChem$JarHCO3)
JarChem$JarCO3<- ifelse((is.na(JarChem$JarCO3)), paste(JarChem$EstJarCO3), JarChem$JarCO3)
JarChem$JarCO2<- ifelse((is.na(JarChem$JarCO2)), paste(JarChem$EstJarCO2), JarChem$JarCO2)
JarChem$JarOmegaCalcite<- ifelse((is.na(JarChem$JarOmegaCalcite)), paste(JarChem$EstJarOmegaCalcite), JarChem$JarOmegaCalcite)
JarChem$JarOmegaArg<- ifelse((is.na(JarChem$JarOmegaArg)), paste(JarChem$EstJarOmegaArg), JarChem$JarOmegaArg)

#get the carb parameters to numeric
JarChem$JarpCO2<- as.numeric(JarChem$JarpCO2)
JarChem$JarAlkCalc<- as.numeric(JarChem$JarAlkCalc)
JarChem$JarHCO3<- as.numeric(JarChem$JarHCO3)
JarChem$JarCO3<- as.numeric(JarChem$JarCO3)
JarChem$JarCO2<- as.numeric(JarChem$JarCO2)
JarChem$JarOmegaCalcite<- as.numeric(JarChem$JarOmegaCalcite)
JarChem$JarOmegaArg<- as.numeric(JarChem$JarOmegaArg)
#good to go!

Look at the water chemistry data for adult tanks

#subset the tank data to only get blocks 1 and 2 
Tank_WCB1B2<- subset(Tank_WC, Block != "B3")
#test to see if tanks are significantly different. 
#start by plotting the pCO2 for each tank
boxplot(WCa_pHDIC~Tank, data=Tank_WCB1B2, na.omit=TRUE)

Tank_WCB1B2sub<- subset(Tank_WCB1B2, WCa_pHDIC !="NA")

#check for significant differences in water chemistry for blocks 1 & 2
#pCO2 differences
Ca1<- lm(WCa_pHDIC~ParentTrt*Block*Tank, data=Tank_WCB1B2sub)

#check assumptions
par(mfcol=c(2,2))
plot(Ca1) #there is more variance at the high pCO2 treatment. 

par(mfcol=c(1,1))
acf(Ca1)
## Error in complete.cases(object): not all arguments have the same length
Ca2<- lm(WCa_pHDIC~ParentTrt*Block+Tank, data=Tank_WCB1B2sub)
anova(Ca1, Ca2)#choose Ca2
## Analysis of Variance Table
## 
## Model 1: WCa_pHDIC ~ ParentTrt * Block * Tank
## Model 2: WCa_pHDIC ~ ParentTrt * Block + Tank
##   Res.Df    RSS Df Sum of Sq F Pr(>F)
## 1     55 1.5448                      
## 2     55 1.5448  0         0
Ca3<- lm(WCa_pHDIC~ParentTrt+Block*Tank, data=Tank_WCB1B2sub)
anova(Ca2, Ca3) #unclear which is better
## Analysis of Variance Table
## 
## Model 1: WCa_pHDIC ~ ParentTrt * Block + Tank
## Model 2: WCa_pHDIC ~ ParentTrt + Block * Tank
##   Res.Df    RSS Df Sum of Sq F Pr(>F)
## 1     55 1.5448                      
## 2     55 1.5448  0         0
Ca4<- lm(WCa_pHDIC~ParentTrt*Tank+Block, data=Tank_WCB1B2sub)
anova(Ca2, Ca4) #unclear which is better
## Analysis of Variance Table
## 
## Model 1: WCa_pHDIC ~ ParentTrt * Block + Tank
## Model 2: WCa_pHDIC ~ ParentTrt * Tank + Block
##   Res.Df    RSS Df  Sum of Sq F Pr(>F)
## 1     55 1.5448                       
## 2     55 1.5448  0 1.1102e-15
anova(Ca3, Ca4) #unclear which is better
## Analysis of Variance Table
## 
## Model 1: WCa_pHDIC ~ ParentTrt + Block * Tank
## Model 2: WCa_pHDIC ~ ParentTrt * Tank + Block
##   Res.Df    RSS Df  Sum of Sq F Pr(>F)
## 1     55 1.5448                       
## 2     55 1.5448  0 1.1102e-15
Ca5<- lm(WCa_pHDIC~ParentTrt+Block+Tank, data=Tank_WCB1B2sub)
anova(Ca2, Ca5) #choose Ca5
## Analysis of Variance Table
## 
## Model 1: WCa_pHDIC ~ ParentTrt * Block + Tank
## Model 2: WCa_pHDIC ~ ParentTrt + Block + Tank
##   Res.Df    RSS Df Sum of Sq F Pr(>F)
## 1     55 1.5448                      
## 2     55 1.5448  0         0
anova(Ca3, Ca5) #choose Ca5
## Analysis of Variance Table
## 
## Model 1: WCa_pHDIC ~ ParentTrt + Block * Tank
## Model 2: WCa_pHDIC ~ ParentTrt + Block + Tank
##   Res.Df    RSS Df Sum of Sq F Pr(>F)
## 1     55 1.5448                      
## 2     55 1.5448  0         0
anova(Ca4, Ca5) #choose Ca5
## Analysis of Variance Table
## 
## Model 1: WCa_pHDIC ~ ParentTrt * Tank + Block
## Model 2: WCa_pHDIC ~ ParentTrt + Block + Tank
##   Res.Df    RSS Df   Sum of Sq F Pr(>F)
## 1     55 1.5448                        
## 2     55 1.5448  0 -1.1102e-15
Ca6<- lm(WCa_pHDIC~ParentTrt*Block, data=Tank_WCB1B2sub)
anova(Ca5, Ca6) #choose Ca6
## Analysis of Variance Table
## 
## Model 1: WCa_pHDIC ~ ParentTrt + Block + Tank
## Model 2: WCa_pHDIC ~ ParentTrt * Block
##   Res.Df    RSS  Df Sum of Sq      F Pr(>F)
## 1     55 1.5448                            
## 2     67 1.7602 -12  -0.21535 0.6389 0.7998
Ca7<- lm(WCa_pHDIC~ParentTrt+Block, data=Tank_WCB1B2sub)
anova(Ca6, Ca7) #Choose Ca7
## Analysis of Variance Table
## 
## Model 1: WCa_pHDIC ~ ParentTrt * Block
## Model 2: WCa_pHDIC ~ ParentTrt + Block
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     67 1.7602                           
## 2     68 1.8280 -1 -0.067855 2.5829 0.1127
Ca8<- lm(WCa_pHDIC~Block*Tank, data=Tank_WCB1B2sub)
anova(Ca7, Ca8) #choose Ca7
## Analysis of Variance Table
## 
## Model 1: WCa_pHDIC ~ ParentTrt + Block
## Model 2: WCa_pHDIC ~ Block * Tank
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     68 1.8280                           
## 2     55 1.5448 13   0.28321 0.7756 0.6815
Ca9<- lm(WCa_pHDIC~Block+Tank, data=Tank_WCB1B2sub)
anova(Ca7, Ca9) #unclear which to choose
## Analysis of Variance Table
## 
## Model 1: WCa_pHDIC ~ ParentTrt + Block
## Model 2: WCa_pHDIC ~ Block + Tank
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     68 1.8280                           
## 2     55 1.5448 13   0.28321 0.7756 0.6815
Ca10<- lm(WCa_pHDIC~ParentTrt*Tank, data=Tank_WCB1B2sub)
anova(Ca7, Ca10) #choose Ca7
## Analysis of Variance Table
## 
## Model 1: WCa_pHDIC ~ ParentTrt + Block
## Model 2: WCa_pHDIC ~ ParentTrt * Tank
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     68 1.8280                           
## 2     55 1.5448 13   0.28321 0.7756 0.6815
anova(Ca9, Ca10)#choose Ca9
## Analysis of Variance Table
## 
## Model 1: WCa_pHDIC ~ Block + Tank
## Model 2: WCa_pHDIC ~ ParentTrt * Tank
##   Res.Df    RSS Df  Sum of Sq F Pr(>F)
## 1     55 1.5448                       
## 2     55 1.5448  0 2.2205e-15
Ca11<- lm(WCa_pHDIC~ParentTrt+Tank, data=Tank_WCB1B2sub)
anova(Ca7, Ca11) #unclear which to choose
## Analysis of Variance Table
## 
## Model 1: WCa_pHDIC ~ ParentTrt + Block
## Model 2: WCa_pHDIC ~ ParentTrt + Tank
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     68 1.8280                           
## 2     55 1.5448 13   0.28321 0.7756 0.6815
anova(Ca9, Ca11) #unclear which to choose
## Analysis of Variance Table
## 
## Model 1: WCa_pHDIC ~ Block + Tank
## Model 2: WCa_pHDIC ~ ParentTrt + Tank
##   Res.Df    RSS Df  Sum of Sq F Pr(>F)
## 1     55 1.5448                       
## 2     55 1.5448  0 2.2205e-15
Ca12<- lm(WCa_pHDIC~ParentTrt, data=Tank_WCB1B2sub)
anova(Ca7, Ca12) #choose Ca12
## Analysis of Variance Table
## 
## Model 1: WCa_pHDIC ~ ParentTrt + Block
## Model 2: WCa_pHDIC ~ ParentTrt
##   Res.Df    RSS Df Sum of Sq     F   Pr(>F)   
## 1     68 1.8280                               
## 2     69 2.1361 -1  -0.30807 11.46 0.001185 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(Ca9, Ca12) #choose Ca12
## Analysis of Variance Table
## 
## Model 1: WCa_pHDIC ~ Block + Tank
## Model 2: WCa_pHDIC ~ ParentTrt
##   Res.Df    RSS  Df Sum of Sq      F Pr(>F)
## 1     55 1.5448                            
## 2     69 2.1361 -14  -0.59127 1.5037 0.1408
anova(Ca11, Ca12)#choose Ca12
## Analysis of Variance Table
## 
## Model 1: WCa_pHDIC ~ ParentTrt + Tank
## Model 2: WCa_pHDIC ~ ParentTrt
##   Res.Df    RSS  Df Sum of Sq      F Pr(>F)
## 1     55 1.5448                            
## 2     69 2.1361 -14  -0.59127 1.5037 0.1408
Ca13<- lm(WCa_pHDIC~Block, data=Tank_WCB1B2sub)
anova(Ca12, Ca13) 
## Analysis of Variance Table
## 
## Model 1: WCa_pHDIC ~ ParentTrt
## Model 2: WCa_pHDIC ~ Block
##   Res.Df     RSS Df Sum of Sq F Pr(>F)
## 1     69  2.1361                      
## 2     69 17.0800  0   -14.944
Ca14<- lm(WCa_pHDIC~Tank, data=Tank_WCB1B2sub)
anova(Ca12, Ca14) 
## Analysis of Variance Table
## 
## Model 1: WCa_pHDIC ~ ParentTrt
## Model 2: WCa_pHDIC ~ Tank
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     69 2.1361                           
## 2     55 1.5448 14   0.59127 1.5037 0.1408
anova(Ca13, Ca14)# select Ca14
## Analysis of Variance Table
## 
## Model 1: WCa_pHDIC ~ Block
## Model 2: WCa_pHDIC ~ Tank
##   Res.Df     RSS Df Sum of Sq      F    Pr(>F)    
## 1     69 17.0800                                  
## 2     55  1.5448 14    15.535 39.507 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#check AIC to see what the final model should be
AIC(Ca12) #lowest AIC
## [1] -41.27382
AIC(Ca13)
## [1] 106.3303
AIC(Ca14)
## [1] -36.28299
par(mfcol=c(2,2))
plot(Ca12) #does this look okay to you, Katie? 

par(mfcol=c(1,1))
acf(Ca12)
## Error in complete.cases(object): not all arguments have the same length
summary(Ca12)
## 
## Call:
## lm(formula = WCa_pHDIC ~ ParentTrt, data = Tank_WCB1B2sub)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.49577 -0.07996 -0.02372  0.09681  0.58061 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.30802    0.02932   44.60   <2e-16 ***
## ParentTrt2800 -0.92881    0.04177  -22.24   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1759 on 69 degrees of freedom
## Multiple R-squared:  0.8776, Adjusted R-squared:  0.8758 
## F-statistic: 494.5 on 1 and 69 DF,  p-value: < 2.2e-16
#There is a significant effect of ParentTrt, but not tank or block according to best model (lowest AIC). So I would feel confident just using this as a factor. 

#create dataframe that has the mean values for each tank
TankMeans<- ddply(Tank_WCB1B2, .(Tank, Block, ParentTrt), numcolwise(mean, na.rm=T))

Join the data into dataframes for analysis.

#First dataframe will be for examining egg morphology for all eggs traced; multiple values for each adult
ParentInfo<- merge(x=Adult_Sample, y=Parent_ID, by="AdultID", all.x=TRUE)
Eggs<- merge(x=Egg_Morphology, y=ParentInfo, by.x="FemaleID", by.y="ParentBlockID", all.x=TRUE)
#add the tank data
Eggs<- merge(x=Eggs, y=TankMeans, by.x="TrtTankID", by.y= "Tank", all.x=TRUE)
#subset the data to exclude Block 3
EggsB1B2<- subset(Eggs, Block !="B3") # in the end we may decide to also exclude Block 2. 
EggsYoutliers<- subset(Eggs, Block!="B3") #dataset with the outliers remaining

Visualize the data for eggs to start. Again, using only blocks 1 & 2

#There were some concerns at first that the eggs that were measured were too large. Elise looked into this in April 2020. The literature lists mature eggs for C. virginica as ~60 um in diameter. Check this against our mean eggs

meanegg<- aggregate(EggDiamum~Block, data= EggsB1B2, FUN=mean)
meanegg # these means are in agreement with the average size of a mature egg. 
##   Block EggDiamum
## 1    B1  62.40381
## 2    B2  63.37102
#now look at the data to find outliers. 
#look at the egg measurements for each female
boxplot(EggDiamum~FemaleID, data=EggsB1B2) #CF06_B2 seems to have eggs that are smaller than everyone else

boxplot(EggDiamum~Block*ParentTrt, data=EggsB1B2)

#check for outliers using code from https://www.r-bloggers.com/identify-describe-plot-and-remove-the-outliers-from-the-dataset/. The function uses Tukey's method to ID ouliers ranged above and below the 1.5*IQR. 
outlierKD <- function(dt, var) {
     var_name <- eval(substitute(var),eval(dt))
     na1 <- sum(is.na(var_name))
     m1 <- mean(var_name, na.rm = T)
     par(mfrow=c(2, 2), oma=c(0,0,3,0))
     boxplot(var_name, main="With outliers")
     hist(var_name, main="With outliers", xlab=NA, ylab=NA)
     outlier <- boxplot.stats(var_name)$out
     mo <- mean(outlier)
     var_name <- ifelse(var_name %in% outlier, NA, var_name)
     boxplot(var_name, main="Without outliers")
     hist(var_name, main="Without outliers", xlab=NA, ylab=NA)
     title("Outlier Check", outer=TRUE)
     na2 <- sum(is.na(var_name))
     cat("Outliers identified:", na2 - na1, "n")
     cat("Propotion (%) of outliers:", round((na2 - na1) / sum(!is.na(var_name))*100, 1), "n")
     cat("Mean of the outliers:", round(mo, 2), "n")
     m2 <- mean(var_name, na.rm = T)
     cat("Mean without removing outliers:", round(m1, 2), "n")
     cat("Mean if we remove outliers:", round(m2, 2), "n")
     response <- readline(prompt="Do you want to remove outliers and to replace with NA? [yes/no]: ")
     if(response == "y" | response == "yes"){
          dt[as.character(substitute(var))] <- invisible(var_name)
          assign(as.character(as.list(match.call())$dt), dt, envir = .GlobalEnv)
          cat("Outliers successfully removed", "n")
          return(invisible(dt))
     } else{
          dt[as.character(substitute(var))] <- invisible(var_name)
          assign(as.character(as.list(match.call())$dt), dt, envir = .GlobalEnv)
          cat("Outliers successfully removed", "n")
          return(invisible(dt))
     }
}

outlierKD(EggsB1B2, EggDiamum)

## Outliers identified: 90 nPropotion (%) of outliers: 10.8 nMean of the outliers: 57.85 nMean without removing outliers: 62.88 nMean if we remove outliers: 63.43 nDo you want to remove outliers and to replace with NA? [yes/no]: 
## Outliers successfully removed n
#I opted to remove outliers for now. 
#Outliers identified: 90 nPropotion (%) of outliers: 10.8 nMean of the outliers: 57.85 nMean without removing outliers: 62.88 nMean if we remove outliers: 63.43 n

#count how many eggs are left per female after removing the outliers. 
outrem<- aggregate(EggDiamum~FemaleID, data=EggsB1B2, FUN=length)
#there are three females that have 10 or fewer eggs : CF06_B2, CF08_B2, and EF07_B1

#remove females with fewer than 10 eggs
EggsB1B2sub<- subset(EggsB1B2, FemaleID != "CF06_B2" & FemaleID != "CF08_B2")

#moving forward for Egg data, I will be using EggsB1B2sub

#Plot again
par(mfcol=c(1,1))
boxplot(EggDiamum~FemaleID, data=EggsB1B2sub) 

boxplot(EggDiamum~Block*ParentTrt, data=EggsB1B2sub)

#note that the eggs were filtered through a 70 um filter, but it appears many of them that were that large or larger made it through. Katie, should we remove these as well? I don't really think we should. 
#plot histograms of the egg data with outliers removed
ggplot(EggsB1B2sub, aes(x=EggDiamum, color=ParentTrt))+
  geom_histogram(alpha=0.5, position="identity") +
  facet_grid(~Block) 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 59 rows containing non-finite values (stat_bin).

eggmeans<- aggregate(EggDiamum~ParentTrt, data=EggsB1B2sub, FUN=mean) #400 = 63.76692; 2800= 63.12195
eggmedians<- aggregate(EggDiamum~ParentTrt, data=EggsB1B2sub, FUN=median)#400 = 63.10395; 2800= 62.68700
#I will use the mean egg size. 

Make datasets without the egg size outliers

#start by removing the outliers from Egg_Morphology
Egg_MorphNout<- Egg_Morphology
outlierKD(Egg_MorphNout, EggDiamum)

## Outliers identified: 131 nPropotion (%) of outliers: 11.1 nMean of the outliers: 53.51 nMean without removing outliers: 61.97 nMean if we remove outliers: 62.9 nDo you want to remove outliers and to replace with NA? [yes/no]: 
## Outliers successfully removed n
par(mfcol=c(1,1))
meanegg<- ddply(Egg_MorphNout, .(FemaleID), numcolwise(mean, na.rm=T)) #get mean egg size
FemAdults<- merge(x=ParentInfo, y= meanegg, by.x="ParentBlockID", by.y= "FemaleID", all.y=TRUE)
#Next dataframe will be for one line per traced larva, includes all the data for that jar, including average egg size for that jar
#EggsB1B2sub dataframe has all of the info that we need for the 
Larvae<-merge(x=Larvae_Morphology, y=JarChem, by="JarID", all.x=TRUE)
#use gather function in tidyr to make the Fert_QG Jar ID columns in long form vs. wide form
Fert_QG_long <- Fert_QG %>% tidyr::gather(Key, JarID, JarID1:JarID6)
## Warning: attributes are not identical across measure variables;
## they will be dropped
#use the gathered data frame to merge with Larvae
Larvae<-merge(x=Larvae, y=Fert_QG_long, by.x= c("JarID", "CrossID"), by.y=c("JarID", "CrossID"), all.x=TRUE) #did the join by two columns so CrossID column wasn't duplicated
Larvae<- merge(x=Larvae, y=Cross, by="CrossID", all.x=TRUE)
Larvae<- merge(x=Larvae, y=FemAdults, by.x= "FemaleID", by.y="ParentBlockID", all.x=TRUE)
Larvae<- merge(x=Larvae, y= Larvae_Counts, by="JarID", all.x=TRUE)
Larvae<- merge(x=Larvae, y=TankMeans, by.x= "TrtTankID", by.y="Tank", all.x=TRUE)
#Remove jars without larvae
Larvae<- subset(Larvae, Larvae=="TRUE")
#get the growth per day for the larvae. Katie, do we have a column with the larvae age? 
Larvae$GrowthPerDay<- (Larvae$LarvaeDiamum-Larvae$EggDiamum)/3
#Final dataframe will be one line per Jar, includes all data for the jar, including the average larvae size per jar and cilia
LarvByJar<-merge(x=Larvae_Calcification, y=Larvae_Cilia, by="JarID", all.x=TRUE, all.y=TRUE)
LarvByJar<- merge(x=LarvByJar, y=JarChem, by="JarID", all.x=TRUE)
LarvByJar<- merge(x=LarvByJar, y=Fert_QG_long, by.x=c("JarID", "CrossID"), by.y=c("JarID","CrossID"), all.x=TRUE)
LarvByJar<- merge(x=LarvByJar, y=Cross, by="CrossID", all.x=TRUE)
LarvByJar<- merge(x=LarvByJar, y=FemAdults, by.x="FemaleID", by.y="ParentBlockID", all.x=TRUE)
LarvByJar<- merge(x=LarvByJar, y=Larvae_Counts, by="JarID", all.x=TRUE)
LarvByJar<- merge(x=LarvByJar, y=TankMeans, by.x= "TrtTankID", by.y="Tank", all.x=TRUE)

#get the means for larvae morphology and add to LarvByJar
MeanLarvMorph<- ddply(Larvae_Morphology, .(JarID), numcolwise(mean, na.rm=T))
SELarvMorph<- ddply(Larvae_Morphology, .(JarID), numcolwise(se, na.rm=T)) #get standard error of larvae size
MeanLarvMorph<- full_join(MeanLarvMorph,SELarvMorph, by=c("JarID", "JarID"),suffix=c("","SE")) #join the mean and se larvae data
LarvByJar<- merge(x=LarvByJar, y=MeanLarvMorph, by="JarID", all.x=TRUE)
#Now let's add the survival data. See notes on "Larvae Survival" for more info on how we decided to count total larvae in the jar

#Make columns for the two larvae counts for 1_5 and 2
LarvByJar$v1_5SurvCount<- LarvByJar$SWRTotal+LarvByJar$F2LarvaeCount
LarvByJar$v2SurvCount<- LarvByJar$F1LarvaeCount+LarvByJar$F2LarvaeCount
#use ifelse statement to define the LarvaeSurvived column
LarvByJar$LarvaeSurvived<- ifelse(LarvByJar$ProtocolVersion =="1", paste(LarvByJar$TotalLarvae), ifelse(LarvByJar$ProtocolVersion =="1_5", paste(LarvByJar$v1_5SurvCount), paste(LarvByJar$v2SurvCount)))
LarvByJar$LarvaeSurvived<- as.numeric(LarvByJar$LarvaeSurvived)
## Warning: NAs introduced by coercion
#elise checked the above code to make sure it resulted in the correct calculation and it does. 

#now get the estimate of how many larvae were originally added to the jar. 
LarvByJar$LarvaeStocked<- LarvByJar$LarvaePermL*LarvByJar$mLJarActual
LarvByJar$PropSurvived<- LarvByJar$LarvaeSurvived/LarvByJar$LarvaeStocked

#remove jars that did not have larvae: 
LarvByJar<- subset(LarvByJar, Larvae=="TRUE")
#now remove the data we do not want to analyze. 
#For the Larvae datasets, that means removing Block 3, Block 2 control jars; Larvae from Females CF06_B2 and CF08_B2
LarvaeDat<- subset(Larvae, Block == "B1")
LarvaeB2e<- subset(Larvae, Block=="B2")
LarvaeB2e<- subset(LarvaeB2e, JarTrt == "Exposed")
LarvaeDat<- rbind(LarvaeDat, LarvaeB2e)
LarvaeDat<- subset(LarvaeDat, FemaleID != "CF06_B2" & FemaleID != "CF08_B2")
LarvaeDatB1<- subset(LarvaeDat, Block=="B1")

LarvByJarDat<- subset(LarvByJar, Block == "B1")
LarvByJarB2e<- subset(LarvByJar, Block=="B2")
LarvByJarB2e<- subset(LarvByJarB2e, JarTrt == "Exposed")
LarvByJarDat<- rbind(LarvByJarDat, LarvByJarB2e)
LarvByJarDat<- subset(LarvByJarDat, FemaleID != "CF06_B2" & FemaleID != "CF08_B2")

Test for significant effect of parent treatment on egg size. Start with measured water chem

#to test the simple models, I need to remove data that don't have an associated water chem from pH and DIC, hopefully these bottles can be found and added to the dataset, but for now I will need to omit those
CalEggs<- subset(EggsB1B2sub, WCa_pHDIC != "NA")

#start with models that have the response variable EggDiamum
eggdiam1<- lmer(EggDiamum~WCa_pHDIC*AdultLength + (1|FemaleID) + (1|Block) +(1|AccTankID), data=CalEggs)

#Check assumptions
par(mfcol=c(1,1))
qqnorm(resid(eggdiam1))
qqline(resid(eggdiam1))

plot(eggdiam1)

acf(eggdiam1)
## Error in complete.cases(object): invalid 'type' (S4) of argument
summary(eggdiam1)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: 
## EggDiamum ~ WCa_pHDIC * AdultLength + (1 | FemaleID) + (1 | Block) +  
##     (1 | AccTankID)
##    Data: CalEggs
## 
## REML criterion at convergence: 4850.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6026 -0.6030 -0.0577  0.5649  3.3588 
## 
## Random effects:
##  Groups    Name        Variance  Std.Dev. 
##  FemaleID  (Intercept) 6.153e+00 2.480e+00
##  AccTankID (Intercept) 4.709e-13 6.862e-07
##  Block     (Intercept) 4.008e-02 2.002e-01
##  Residual              1.936e+01 4.400e+00
## Number of obs: 826, groups:  FemaleID, 30; AccTankID, 6; Block, 2
## 
## Fixed effects:
##                       Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)            60.7388     4.5229 25.3650  13.429 4.93e-13 ***
## WCa_pHDIC               3.5893     4.3469 24.8490   0.826    0.417    
## AdultLength             0.6196     1.4016 24.9670   0.442    0.662    
## WCa_pHDIC:AdultLength  -0.8390     1.3123 24.9140  -0.639    0.528    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) WC_pHDIC AdltLn
## WCa_pHDIC   -0.884                
## AdultLength -0.976  0.852         
## WC_pHDIC:AL  0.875 -0.970   -0.890
#try simplifying the model; step function gives me errors, so I ended up doing this manually
eggdiam2<- lmer(EggDiamum~WCa_pHDIC+ (1|FemaleID) + (1|Block) , data=CalEggs)
anova(eggdiam1, eggdiam2)#Choose eggdiam2
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggDiamum ~ WCa_pHDIC + (1 | FemaleID) + (1 | Block)
## object: EggDiamum ~ WCa_pHDIC * AdultLength + (1 | FemaleID) + (1 | Block) + 
## object:     (1 | AccTankID)
##        Df    AIC    BIC  logLik deviance Chisq Chi Df Pr(>Chisq)
## ..1     5 4866.0 4889.5 -2428.0   4856.0                        
## object  8 4871.4 4909.1 -2427.7   4855.4 0.548      3     0.9082
eggdiam3<- lm(EggDiamum~WCa_pHDIC, data=CalEggs)
anova(eggdiam2, eggdiam3)#choose eggdiam2
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggDiamum ~ WCa_pHDIC
## object: EggDiamum ~ WCa_pHDIC + (1 | FemaleID) + (1 | Block)
##        Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## ..1     3 4996.6 5010.8 -2495.3   4990.6                             
## object  5 4866.0 4889.5 -2428.0   4856.0 134.69      2  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
eggdiam4<- lmer(EggDiamum~WCa_pHDIC + (1|FemaleID), data=CalEggs)
anova(eggdiam2, eggdiam4)#choose eggdiam4
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggDiamum ~ WCa_pHDIC + (1 | FemaleID)
## object: EggDiamum ~ WCa_pHDIC + (1 | FemaleID) + (1 | Block)
##        Df  AIC    BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## ..1     4 4864 4882.8  -2428     4856                        
## object  5 4866 4889.5  -2428     4856     0      1          1
eggdiam5<- lmer(EggDiamum ~ 1 + (1|FemaleID), data=CalEggs)
anova(eggdiam4, eggdiam5) #choose eggdiam5
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggDiamum ~ 1 + (1 | FemaleID)
## object: EggDiamum ~ WCa_pHDIC + (1 | FemaleID)
##        Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## ..1     3 4862.7 4876.9 -2428.3   4856.7                         
## object  4 4864.0 4882.8 -2428.0   4856.0 0.7443      1     0.3883
summary(eggdiam5)
## summary from lme4 is returned
## some computational error has occurred in lmerTest
## Linear mixed model fit by REML ['lmerMod']
## Formula: EggDiamum ~ 1 + (1 | FemaleID)
##    Data: CalEggs
## 
## REML criterion at convergence: 4856.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.5945 -0.6051 -0.0562  0.5569  3.3466 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  FemaleID (Intercept)  5.722   2.392   
##  Residual             19.360   4.400   
## Number of obs: 826, groups:  FemaleID, 30
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  63.4178     0.4637   136.8
#look at assumptions of eggdiam 5
par(mfcol=c(1,1))
plot(eggdiam5)

qqnorm(resid(eggdiam5))
qqline(resid(eggdiam5))

acf(eggdiam5)
## Error in complete.cases(object): invalid 'type' (S4) of argument
eggdiam6<- lm(EggDiamum~FemaleID, data=CalEggs)
anova(eggdiam3, eggdiam6)
## Analysis of Variance Table
## 
## Model 1: EggDiamum ~ WCa_pHDIC
## Model 2: EggDiamum ~ FemaleID
##   Res.Df   RSS Df Sum of Sq      F    Pr(>F)    
## 1    824 20347                                  
## 2    796 15410 28    4936.2 9.1061 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(eggdiam6)
## 
## Call:
## lm(formula = EggDiamum ~ FemaleID, data = CalEggs)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.2388  -2.7244  -0.2563   2.4543  15.1053 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     64.85003    0.77781  83.375  < 2e-16 ***
## FemaleIDCF01_B2  6.69911    1.16172   5.767 1.16e-08 ***
## FemaleIDCF02_B1 -4.08868    1.17447  -3.481 0.000526 ***
## FemaleIDCF02_B2 -2.11343    1.12808  -1.873 0.061369 .  
## FemaleIDCF03_B1  3.99989    1.14979   3.479 0.000531 ***
## FemaleIDCF03_B2 -1.36142    1.12808  -1.207 0.227849    
## FemaleIDCF04_B1 -2.97063    1.11818  -2.657 0.008050 ** 
## FemaleIDCF04_B2 -3.67086    1.12808  -3.254 0.001186 ** 
## FemaleIDCF05_B1 -4.33730    1.13860  -3.809 0.000150 ***
## FemaleIDCF05_B2 -0.97979    1.11818  -0.876 0.381164    
## FemaleIDCF06_B1 -0.23605    1.17447  -0.201 0.840765    
## FemaleIDCF07_B1 -3.27936    1.12808  -2.907 0.003750 ** 
## FemaleIDCF07_B2 -1.80777    1.11818  -1.617 0.106337    
## FemaleIDCF08_B1 -0.32337    1.11818  -0.289 0.772506    
## FemaleIDEF01_B1 -0.70782    1.14979  -0.616 0.538327    
## FemaleIDEF01_B2 -4.63235    1.11818  -4.143 3.80e-05 ***
## FemaleIDEF02_B1 -3.09748    1.18813  -2.607 0.009304 ** 
## FemaleIDEF02_B2 -1.79542    1.11818  -1.606 0.108742    
## FemaleIDEF03_B1 -2.42848    1.14979  -2.112 0.034988 *  
## FemaleIDEF03_B2  0.07892    1.12808   0.070 0.944243    
## FemaleIDEF04_B1  0.19892    1.12808   0.176 0.860079    
## FemaleIDEF04_B2  0.22683    1.12808   0.201 0.840692    
## FemaleIDEF05_B1 -5.07044    1.12808  -4.495 8.00e-06 ***
## FemaleIDEF05_B2 -2.31488    1.11818  -2.070 0.038753 *  
## FemaleIDEF06_B1 -1.45852    1.25418  -1.163 0.245210    
## FemaleIDEF06_B2  2.48721    1.13860   2.184 0.029220 *  
## FemaleIDEF07_B1 -3.67532    1.59404  -2.306 0.021386 *  
## FemaleIDEF07_B2 -1.31073    1.13860  -1.151 0.250007    
## FemaleIDEF08_B1 -3.08252    1.12808  -2.733 0.006424 ** 
## FemaleIDEF08_B2 -2.20750    1.13860  -1.939 0.052881 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.4 on 796 degrees of freedom
##   (59 observations deleted due to missingness)
## Multiple R-squared:  0.2461, Adjusted R-squared:  0.2187 
## F-statistic: 8.962 on 29 and 796 DF,  p-value: < 2.2e-16
#look at assumptions of eggdiam 6
par(mfcol=c(2,2))
plot(eggdiam6)

par(mfcol=c(1,1))
acf(eggdiam6)
## Error in complete.cases(object): not all arguments have the same length
#FemaleID matters but not block, treatment or adult length. 

plot(EggDiamum~FemaleID, data=CalEggs) #we need to account for FemaleID, but not egg size in Larvae models

#now look at the response variable EggAreaum2
eggarea1<- lmer(EggAreaum2~WCa_pHDIC*AdultLength + (1|FemaleID) + (1|Block) +(1|AccTankID), data=CalEggs)

#Check assumptions
qqnorm(resid(eggarea1))
qqline(resid(eggarea1)) #definitely doesn't meet assumption of normality

plot(eggarea1)

acf(eggarea1)
## Error in complete.cases(object): invalid 'type' (S4) of argument
summary(eggarea1)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: EggAreaum2 ~ WCa_pHDIC * AdultLength + (1 | FemaleID) + (1 |  
##     Block) + (1 | AccTankID)
##    Data: CalEggs
## 
## REML criterion at convergence: 13252.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.0997 -0.3203  0.0693  0.4951  3.3492 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  FemaleID  (Intercept)  57841   240.50  
##  AccTankID (Intercept)      0     0.00  
##  Block     (Intercept)   2515    50.15  
##  Residual              181508   426.04  
## Number of obs: 885, groups:  FemaleID, 30; AccTankID, 6; Block, 2
## 
## Fixed effects:
##                       Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)            2349.30     438.23   25.68   5.361 1.35e-05 ***
## WCa_pHDIC               537.70     420.45   25.04   1.279    0.213    
## AdultLength             108.69     135.43   25.08   0.803    0.430    
## WCa_pHDIC:AdultLength  -114.35     126.94   25.10  -0.901    0.376    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) WC_pHDIC AdltLn
## WCa_pHDIC   -0.882                
## AdultLength -0.973  0.852         
## WC_pHDIC:AL  0.873 -0.970   -0.890
#try simplifying the model; step function gives me errors, so I ended up doing this manually
eggarea2<- lmer(EggAreaum2~WCa_pHDIC+ (1|FemaleID) + (1|Block) , data=CalEggs)
anova(eggarea1, eggarea2)#Choose eggarea2
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggAreaum2 ~ WCa_pHDIC + (1 | FemaleID) + (1 | Block)
## object: EggAreaum2 ~ WCa_pHDIC * AdultLength + (1 | FemaleID) + (1 | 
## object:     Block) + (1 | AccTankID)
##        Df   AIC   BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## ..1     5 13305 13329 -6647.6    13295                         
## object  8 13310 13349 -6647.2    13294 0.8548      3     0.8363
eggarea3<- lm(EggAreaum2~WCa_pHDIC, data=CalEggs)
anova(eggarea2, eggarea3)#choose eggarea2
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggAreaum2 ~ WCa_pHDIC
## object: EggAreaum2 ~ WCa_pHDIC + (1 | FemaleID) + (1 | Block)
##        Df   AIC   BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## ..1     3 13460 13475 -6727.1    13454                             
## object  5 13305 13329 -6647.6    13295 158.98      2  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
eggarea4<- lmer(EggAreaum2~WCa_pHDIC + (1|FemaleID), data=CalEggs)
anova(eggarea2, eggarea4)#choose eggarea4
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggAreaum2 ~ WCa_pHDIC + (1 | FemaleID)
## object: EggAreaum2 ~ WCa_pHDIC + (1 | FemaleID) + (1 | Block)
##        Df   AIC   BIC  logLik deviance Chisq Chi Df Pr(>Chisq)
## ..1     4 13303 13322 -6647.6    13295                        
## object  5 13305 13329 -6647.6    13295     0      1          1
eggarea5<- lmer(EggAreaum2 ~ 1 + (1|FemaleID), data=CalEggs)
anova(eggarea4, eggarea5) #choose eggarea5
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggAreaum2 ~ 1 + (1 | FemaleID)
## object: EggAreaum2 ~ WCa_pHDIC + (1 | FemaleID)
##        Df   AIC   BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)  
## ..1     3 13304 13318 -6649.1    13298                           
## object  4 13303 13322 -6647.6    13295 2.9337      1    0.08675 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(eggarea5)
## summary from lme4 is returned
## some computational error has occurred in lmerTest
## Linear mixed model fit by REML ['lmerMod']
## Formula: EggAreaum2 ~ 1 + (1 | FemaleID)
##    Data: CalEggs
## 
## REML criterion at convergence: 13288.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.1290 -0.3200  0.0691  0.4909  3.3196 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  FemaleID (Intercept)  60286   245.5   
##  Residual             181520   426.1   
## Number of obs: 885, groups:  FemaleID, 30
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  2829.39      47.12   60.04
eggarea6<- lm(EggAreaum2~FemaleID, data=CalEggs)
anova(eggarea3, eggarea6)
## Analysis of Variance Table
## 
## Model 1: EggAreaum2 ~ WCa_pHDIC
## Model 2: EggAreaum2 ~ FemaleID
##   Res.Df       RSS Df Sum of Sq      F    Pr(>F)    
## 1    883 207413205                                  
## 2    855 155205895 28  52207310 10.271 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(eggarea6)
## 
## Call:
## lm(formula = EggAreaum2 ~ FemaleID, data = CalEggs)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2612.56  -130.77    23.57   203.23  1413.03 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      2991.01      75.32  39.712  < 2e-16 ***
## FemaleIDCF01_B2   618.54     108.28   5.713 1.54e-08 ***
## FemaleIDCF02_B1  -602.67     107.37  -5.613 2.69e-08 ***
## FemaleIDCF02_B2  -157.33     108.28  -1.453 0.146571    
## FemaleIDCF03_B1   260.71     108.28   2.408 0.016258 *  
## FemaleIDCF03_B2   -82.04     108.28  -0.758 0.448818    
## FemaleIDCF04_B1  -176.67     108.28  -1.632 0.103127    
## FemaleIDCF04_B2  -259.31     108.28  -2.395 0.016838 *  
## FemaleIDCF05_B1  -366.42     107.37  -3.413 0.000674 ***
## FemaleIDCF05_B2    27.31     108.28   0.252 0.800957    
## FemaleIDCF06_B1    74.55     107.37   0.694 0.487682    
## FemaleIDCF07_B1  -170.92     109.24  -1.565 0.118015    
## FemaleIDCF07_B2  -198.06     108.28  -1.829 0.067718 .  
## FemaleIDCF08_B1   -75.27     108.28  -0.695 0.487118    
## FemaleIDEF01_B1  -147.38     108.28  -1.361 0.173816    
## FemaleIDEF01_B2  -422.48     108.28  -3.902 0.000103 ***
## FemaleIDEF02_B1  -558.53     108.28  -5.158 3.10e-07 ***
## FemaleIDEF02_B2  -274.89     108.28  -2.539 0.011300 *  
## FemaleIDEF03_B1  -260.03     108.28  -2.402 0.016537 *  
## FemaleIDEF03_B2  -254.46     108.28  -2.350 0.018995 *  
## FemaleIDEF04_B1   -61.35     108.28  -0.567 0.571110    
## FemaleIDEF04_B2   -14.22     108.28  -0.131 0.895536    
## FemaleIDEF05_B1  -425.09     108.28  -3.926 9.33e-05 ***
## FemaleIDEF05_B2  -167.23     108.28  -1.544 0.122839    
## FemaleIDEF06_B1  -417.78     108.28  -3.858 0.000123 ***
## FemaleIDEF06_B2   331.26     108.28   3.059 0.002287 ** 
## FemaleIDEF07_B1  -399.81     148.91  -2.685 0.007396 ** 
## FemaleIDEF07_B2  -194.25     108.28  -1.794 0.073158 .  
## FemaleIDEF08_B1  -151.80     108.28  -1.402 0.161272    
## FemaleIDEF08_B2  -355.12     108.28  -3.280 0.001081 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 426.1 on 855 degrees of freedom
## Multiple R-squared:  0.2689, Adjusted R-squared:  0.2441 
## F-statistic: 10.85 on 29 and 855 DF,  p-value: < 2.2e-16
#look at assumptions of eggarea6
par(mfcol=c(2,2))
plot(eggarea6)

par(mfcol=c(1,1))
acf(eggarea6)
## Error in complete.cases(object): not all arguments have the same length
#FemaleID matters but not block, treatment or adult length. Same as diameter, but model doesn't seem to meet the assumption of normality

plot(EggAreaum2~FemaleID, data=CalEggs)

#look at EggEccentricity
eggeccen1<- lmer(EggEccentricity~WCa_pHDIC*AdultLength + (1|FemaleID) + (1|Block) +(1|AccTankID), data=CalEggs)

#Check assumptions
qqnorm(resid(eggeccen1))
qqline(resid(eggeccen1)) #looks good!

plot(eggeccen1) #this looks good too

acf(eggeccen1)
## Error in complete.cases(object): invalid 'type' (S4) of argument
summary(eggeccen1)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: EggEccentricity ~ WCa_pHDIC * AdultLength + (1 | FemaleID) +  
##     (1 | Block) + (1 | AccTankID)
##    Data: CalEggs
## 
## REML criterion at convergence: -934.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.64607 -0.65973 -0.06525  0.62934  2.94875 
## 
## Random effects:
##  Groups    Name        Variance  Std.Dev.
##  FemaleID  (Intercept) 0.0032377 0.05690 
##  AccTankID (Intercept) 0.0000000 0.00000 
##  Block     (Intercept) 0.0003786 0.01946 
##  Residual              0.0187144 0.13680 
## Number of obs: 885, groups:  FemaleID, 30; AccTankID, 6; Block, 2
## 
## Fixed effects:
##                       Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)            0.46654    0.10828 25.84800   4.309  0.00021 ***
## WCa_pHDIC             -0.11089    0.10339 24.99100  -1.073  0.29374    
## AdultLength           -0.01820    0.03331 25.05700  -0.546  0.58961    
## WCa_pHDIC:AdultLength  0.02071    0.03122 25.04300   0.663  0.51329    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) WC_pHDIC AdltLn
## WCa_pHDIC   -0.877                
## AdultLength -0.968  0.852         
## WC_pHDIC:AL  0.869 -0.970   -0.890
#try simplifying the model; step function gives me errors, so I ended up doing this manually
eggeccen2<- lmer(EggEccentricity~WCa_pHDIC+ (1|FemaleID) + (1|Block) , data=CalEggs)
anova(eggeccen1, eggeccen2)#Choose eggeccen2
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggEccentricity ~ WCa_pHDIC + (1 | FemaleID) + (1 | Block)
## object: EggEccentricity ~ WCa_pHDIC * AdultLength + (1 | FemaleID) + 
## object:     (1 | Block) + (1 | AccTankID)
##        Df     AIC     BIC logLik deviance  Chisq Chi Df Pr(>Chisq)
## ..1     5 -947.60 -923.67 478.80  -957.60                         
## object  8 -942.14 -903.86 479.07  -958.14 0.5406      3     0.9099
eggeccen3<- lm(EggEccentricity~WCa_pHDIC, data=CalEggs)
anova(eggeccen2, eggeccen3)#choose eggeccen2
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggEccentricity ~ WCa_pHDIC
## object: EggEccentricity ~ WCa_pHDIC + (1 | FemaleID) + (1 | Block)
##        Df     AIC     BIC logLik deviance  Chisq Chi Df Pr(>Chisq)    
## ..1     3 -872.16 -857.81 439.08  -878.16                             
## object  5 -947.60 -923.67 478.80  -957.60 79.437      2  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
eggeccen4<- lmer(EggEccentricity~WCa_pHDIC + (1|FemaleID), data=CalEggs)
anova(eggeccen2, eggeccen4)#choose eggeccen4
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggEccentricity ~ WCa_pHDIC + (1 | FemaleID)
## object: EggEccentricity ~ WCa_pHDIC + (1 | FemaleID) + (1 | Block)
##        Df     AIC     BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## ..1     4 -949.48 -930.34 478.74  -957.48                        
## object  5 -947.60 -923.67 478.80  -957.60 0.123      1     0.7258
eggeccen5<- lmer(EggEccentricity ~ 1 + (1|FemaleID), data=CalEggs)
anova(eggeccen4, eggeccen5) #choose eggeccen5
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggEccentricity ~ 1 + (1 | FemaleID)
## object: EggEccentricity ~ WCa_pHDIC + (1 | FemaleID)
##        Df     AIC     BIC logLik deviance  Chisq Chi Df Pr(>Chisq)  
## ..1     3 -948.42 -934.06 477.21  -954.42                           
## object  4 -949.48 -930.34 478.74  -957.48 3.0635      1    0.08007 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(eggeccen5)
## summary from lme4 is returned
## some computational error has occurred in lmerTest
## Linear mixed model fit by REML ['lmerMod']
## Formula: EggEccentricity ~ 1 + (1 | FemaleID)
##    Data: CalEggs
## 
## REML criterion at convergence: -947.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.68207 -0.67048 -0.06141  0.63147  2.94534 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  FemaleID (Intercept) 0.003507 0.05922 
##  Residual             0.018714 0.13680 
## Number of obs: 885, groups:  FemaleID, 30
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  0.37252    0.01177   31.65
eggeccen6<- lm(EggEccentricity~FemaleID, data=CalEggs)
anova(eggeccen3, eggeccen6)
## Analysis of Variance Table
## 
## Model 1: EggEccentricity ~ WCa_pHDIC
## Model 2: EggEccentricity ~ FemaleID
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1    883 19.210                                  
## 2    855 16.003 28    3.2065 6.1183 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(eggeccen6)
## 
## Call:
## lm(formula = EggEccentricity ~ FemaleID, data = CalEggs)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37648 -0.09290 -0.00824  0.08657  0.39311 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.335311   0.024185  13.864  < 2e-16 ***
## FemaleIDCF01_B2  0.121992   0.034768   3.509 0.000474 ***
## FemaleIDCF02_B1 -0.055068   0.034478  -1.597 0.110591    
## FemaleIDCF02_B2  0.034152   0.034768   0.982 0.326245    
## FemaleIDCF03_B1  0.099931   0.034768   2.874 0.004151 ** 
## FemaleIDCF03_B2  0.042536   0.034768   1.223 0.221511    
## FemaleIDCF04_B1 -0.028397   0.034768  -0.817 0.414300    
## FemaleIDCF04_B2 -0.049178   0.034768  -1.414 0.157596    
## FemaleIDCF05_B1 -0.064377   0.034478  -1.867 0.062215 .  
## FemaleIDCF05_B2 -0.050705   0.034768  -1.458 0.145107    
## FemaleIDCF06_B1  0.102429   0.034478   2.971 0.003053 ** 
## FemaleIDCF07_B1 -0.050701   0.035076  -1.445 0.148699    
## FemaleIDCF07_B2  0.050535   0.034768   1.453 0.146458    
## FemaleIDCF08_B1  0.041736   0.034768   1.200 0.230317    
## FemaleIDEF01_B1 -0.003685   0.034768  -0.106 0.915607    
## FemaleIDEF01_B2  0.053702   0.034768   1.545 0.122822    
## FemaleIDEF02_B1  0.102168   0.034768   2.939 0.003387 ** 
## FemaleIDEF02_B2  0.113973   0.034768   3.278 0.001087 ** 
## FemaleIDEF03_B1  0.068439   0.034768   1.968 0.049343 *  
## FemaleIDEF03_B2  0.129794   0.034768   3.733 0.000202 ***
## FemaleIDEF04_B1  0.051040   0.034768   1.468 0.142470    
## FemaleIDEF04_B2  0.028669   0.034768   0.825 0.409843    
## FemaleIDEF05_B1 -0.048820   0.034768  -1.404 0.160635    
## FemaleIDEF05_B2  0.016992   0.034768   0.489 0.625162    
## FemaleIDEF06_B1  0.116906   0.034768   3.362 0.000807 ***
## FemaleIDEF06_B2  0.040295   0.034768   1.159 0.246804    
## FemaleIDEF07_B1  0.009502   0.047818   0.199 0.842532    
## FemaleIDEF07_B2  0.140033   0.034768   4.028 6.14e-05 ***
## FemaleIDEF08_B1 -0.016346   0.034768  -0.470 0.638376    
## FemaleIDEF08_B2  0.113611   0.034768   3.268 0.001128 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1368 on 855 degrees of freedom
## Multiple R-squared:  0.1827, Adjusted R-squared:  0.155 
## F-statistic: 6.591 on 29 and 855 DF,  p-value: < 2.2e-16
#look at assumptions of eggeccen6
par(mfcol=c(2,2))
plot(eggeccen6)

par(mfcol=c(1,1))
acf(eggeccen6)
## Error in complete.cases(object): not all arguments have the same length
#FemaleID matters but not block, treatment or adult length. Same as diameter

plot(EggEccentricity~FemaleID, data=CalEggs)

Look at the water chemistry data for Jars

#start with omega calcite
plot(JarOmegaCalcite~JarTrt, data=LarvByJarDat)

#remove the Jars that have their pH as 0/NA
CalDat<- subset(LarvByJarDat, JarOmegaCalcite!= "NA")

#I was having trouble with making this model: JarOmegaCalcite~JarTrt*ParentTrt*TimeFilter*BlockID, so I looked at TimeFilter because I think that is the problem. 
Jar1<- lm(JarOmegaCalcite~TimeFilter, data=CalDat)
par(mfcol=c(2,2))
plot(Jar1)
## Warning: not plotting observations with leverage one:
##   1, 4, 5, 6, 7, 8, 13, 15, 17, 18, 22, 23, 29, 34, 36, 48, 49, 52, 53, 57, 58, 62, 64, 66, 90, 91, 94, 95, 100, 103, 111, 126, 135, 141, 142, 150, 153, 161, 164, 168, 171, 178, 182, 186, 193, 195, 199, 215, 225, 226, 241, 247, 253, 257, 258, 266, 269, 270, 271, 277, 282, 283, 294, 297, 298, 302, 312, 313, 314, 316, 317, 318, 319, 322, 323, 326, 334, 342, 345, 349, 352, 357, 364, 367, 372, 374, 375, 380

## Warning: not plotting observations with leverage one:
##   1, 4, 5, 6, 7, 8, 13, 15, 17, 18, 22, 23, 29, 34, 36, 48, 49, 52, 53, 57, 58, 62, 64, 66, 90, 91, 94, 95, 100, 103, 111, 126, 135, 141, 142, 150, 153, 161, 164, 168, 171, 178, 182, 186, 193, 195, 199, 215, 225, 226, 241, 247, 253, 257, 258, 266, 269, 270, 271, 277, 282, 283, 294, 297, 298, 302, 312, 313, 314, 316, 317, 318, 319, 322, 323, 326, 334, 342, 345, 349, 352, 357, 364, 367, 372, 374, 375, 380
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

par(mfcol=c(1,1))
#looks like we can't use timefilter because it is too specific to each jar. Get the error message "observations with leverage one"; try using seatable instead

Jar2<- lmer(JarOmegaCalcite~JarTrt*JarSeatable + (1|Block), data=CalDat)

#check assumptions 
plot(Jar2)

qqnorm(resid(Jar2))
qqline(resid(Jar2))

#looks good to me. 

Jar2step<-step(Jar2)
print(Jar2step)
## 
## Random effects:
##        Chi.sq Chi.DF elim.num p.value
## Block 1324.18      1     kept < 1e-07
## 
## Fixed effects:
##                     Sum Sq Mean Sq NumDF  DenDF     F.value elim.num
## JarTrt             48.5911 48.5911     1 374.71 275385.9221     kept
## JarSeatable         0.0020  0.0010     2 374.70      5.5659     kept
## JarTrt:JarSeatable  0.0013  0.0007     2 374.70      3.6907     kept
##                    Pr(>F)
## JarTrt             <1e-07
## JarSeatable        0.0041
## JarTrt:JarSeatable 0.0259
## 
## Least squares means:
##                               JarTrt JarSeatable Estimate Standard Error
## JarTrt  Control                  1.0          NA    1.182          0.096
## JarTrt  Exposed                  2.0          NA    0.334          0.096
## JarSeatable  2                    NA         1.0    0.755          0.096
## JarSeatable  4                    NA         2.0    0.758          0.096
## JarSeatable  6                    NA         3.0    0.761          0.096
## JarTrt:JarSeatable  Control 2    1.0         1.0    1.177          0.096
## JarTrt:JarSeatable  Exposed 2    2.0         1.0    0.333          0.096
## JarTrt:JarSeatable  Control 4    1.0         2.0    1.182          0.096
## JarTrt:JarSeatable  Exposed 4    2.0         2.0    0.334          0.096
## JarTrt:JarSeatable  Control 6    1.0         3.0    1.188          0.096
## JarTrt:JarSeatable  Exposed 6    2.0         3.0    0.334          0.096
##                                DF t-value Lower CI Upper CI p-value   
## JarTrt  Control               2.9   12.32   0.8699    1.495  0.0014 **
## JarTrt  Exposed               2.9    3.48   0.0213    0.646  0.0427 * 
## JarSeatable  2                2.9    7.87   0.4429    1.068  0.0049 **
## JarSeatable  4                2.9    7.89   0.4452    1.070  0.0049 **
## JarSeatable  6                2.9    7.93   0.4487    1.074  0.0048 **
## JarTrt:JarSeatable  Control 2 2.9   12.26   0.8650    1.490  0.0014 **
## JarTrt:JarSeatable  Exposed 2 2.9    3.47   0.0209    0.646  0.0428 * 
## JarTrt:JarSeatable  Control 4 2.9   12.31   0.8694    1.494  0.0014 **
## JarTrt:JarSeatable  Exposed 4 2.9    3.47   0.0211    0.646  0.0427 * 
## JarTrt:JarSeatable  Control 6 2.9   12.37   0.8755    1.500  0.0014 **
## JarTrt:JarSeatable  Exposed 6 2.9    3.48   0.0219    0.647  0.0424 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Differences of LSMEANS:
##                                            Estimate Standard Error    DF
## JarTrt Control - Exposed                        0.8         0.0016 374.7
## JarSeatable 2 - 4                               0.0         0.0017 374.7
## JarSeatable 2 - 6                               0.0         0.0018 374.7
## JarSeatable 4 - 6                               0.0         0.0017 374.7
## JarTrt:JarSeatable  Control 2 -  Exposed 2      0.8         0.0026 374.7
## JarTrt:JarSeatable  Control 2 -  Control 4      0.0         0.0028 374.7
## JarTrt:JarSeatable  Control 2 -  Exposed 4      0.8         0.0026 374.7
## JarTrt:JarSeatable  Control 2 -  Control 6      0.0         0.0028 374.7
## JarTrt:JarSeatable  Control 2 -  Exposed 6      0.8         0.0026 374.7
## JarTrt:JarSeatable  Exposed 2 -  Control 4     -0.8         0.0026 374.7
## JarTrt:JarSeatable  Exposed 2 -  Exposed 4      0.0         0.0021 374.7
## JarTrt:JarSeatable  Exposed 2 -  Control 6     -0.9         0.0026 374.7
## JarTrt:JarSeatable  Exposed 2 -  Exposed 6      0.0         0.0021 374.7
## JarTrt:JarSeatable  Control 4 -  Exposed 4      0.8         0.0026 374.7
## JarTrt:JarSeatable  Control 4 -  Control 6      0.0         0.0028 374.7
## JarTrt:JarSeatable  Control 4 -  Exposed 6      0.8         0.0026 374.7
## JarTrt:JarSeatable  Exposed 4 -  Control 6     -0.9         0.0026 374.7
## JarTrt:JarSeatable  Exposed 4 -  Exposed 6      0.0         0.0021 374.7
## JarTrt:JarSeatable  Control 6 -  Exposed 6      0.9         0.0026 374.7
##                                            t-value Lower CI Upper CI
## JarTrt Control - Exposed                    524.77   0.8454   0.8518
## JarSeatable 2 - 4                            -1.35  -0.0058   0.0011
## JarSeatable 2 - 6                            -3.32  -0.0093  -0.0024
## JarSeatable 4 - 6                            -1.98  -0.0069   0.0000
## JarTrt:JarSeatable  Control 2 -  Exposed 2  327.04   0.8390   0.8491
## JarTrt:JarSeatable  Control 2 -  Control 4   -1.57  -0.0099   0.0011
## JarTrt:JarSeatable  Control 2 -  Exposed 4  328.88   0.8387   0.8488
## JarTrt:JarSeatable  Control 2 -  Control 6   -3.75  -0.0161  -0.0050
## JarTrt:JarSeatable  Control 2 -  Exposed 6  325.63   0.8379   0.8481
## JarTrt:JarSeatable  Exposed 2 -  Control 4 -328.75  -0.8535  -0.8434
## JarTrt:JarSeatable  Exposed 2 -  Exposed 4   -0.14  -0.0043   0.0038
## JarTrt:JarSeatable  Exposed 2 -  Control 6 -328.94  -0.8597  -0.8495
## JarTrt:JarSeatable  Exposed 2 -  Exposed 6   -0.51  -0.0052   0.0030
## JarTrt:JarSeatable  Control 4 -  Exposed 4  330.60   0.8431   0.8532
## JarTrt:JarSeatable  Control 4 -  Control 6   -2.18  -0.0117  -0.0006
## JarTrt:JarSeatable  Control 4 -  Exposed 6  327.33   0.8423   0.8525
## JarTrt:JarSeatable  Exposed 4 -  Control 6 -330.77  -0.8594  -0.8492
## JarTrt:JarSeatable  Exposed 4 -  Exposed 6   -0.38  -0.0048   0.0033
## JarTrt:JarSeatable  Control 6 -  Exposed 6  327.53   0.8484   0.8587
##                                            p-value    
## JarTrt Control - Exposed                    <2e-16 ***
## JarSeatable 2 - 4                            0.178    
## JarSeatable 2 - 6                            0.001 ***
## JarSeatable 4 - 6                            0.048 *  
## JarTrt:JarSeatable  Control 2 -  Exposed 2  <2e-16 ***
## JarTrt:JarSeatable  Control 2 -  Control 4   0.117    
## JarTrt:JarSeatable  Control 2 -  Exposed 4  <2e-16 ***
## JarTrt:JarSeatable  Control 2 -  Control 6   2e-04 ***
## JarTrt:JarSeatable  Control 2 -  Exposed 6  <2e-16 ***
## JarTrt:JarSeatable  Exposed 2 -  Control 4  <2e-16 ***
## JarTrt:JarSeatable  Exposed 2 -  Exposed 4   0.890    
## JarTrt:JarSeatable  Exposed 2 -  Control 6  <2e-16 ***
## JarTrt:JarSeatable  Exposed 2 -  Exposed 6   0.609    
## JarTrt:JarSeatable  Control 4 -  Exposed 4  <2e-16 ***
## JarTrt:JarSeatable  Control 4 -  Control 6   0.030 *  
## JarTrt:JarSeatable  Control 4 -  Exposed 6  <2e-16 ***
## JarTrt:JarSeatable  Exposed 4 -  Control 6  <2e-16 ***
## JarTrt:JarSeatable  Exposed 4 -  Exposed 6   0.707    
## JarTrt:JarSeatable  Control 6 -  Exposed 6  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Final model:
## lme4::lmer(formula = JarOmegaCalcite ~ JarTrt * JarSeatable + 
##     (1 | Block), data = CalDat, contrasts = list(JarTrt = "contr.SAS", 
##     JarSeatable = "contr.SAS"))
#final model chosen: JarpHCorrSW~JarTrt*JarSeatable + (1|Block)

Jar3<- lmer(JarOmegaCalcite~JarTrt*JarSeatable + (1|Block), data=CalDat)
#check assumptions
qqnorm(resid(Jar3))
qqline(resid(Jar3))

plot(Jar3) #may want to look at this further

summary(Jar3)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: JarOmegaCalcite ~ JarTrt * JarSeatable + (1 | Block)
##    Data: CalDat
## 
## REML criterion at convergence: -2142.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -8.1968 -0.0456  0.0129  0.0345 14.2111 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev.
##  Block    (Intercept) 0.0184273 0.13575 
##  Residual             0.0001764 0.01328 
## Number of obs: 381, groups:  Block, 2
## 
## Fixed effects:
##                              Estimate Std. Error         df  t value
## (Intercept)                  1.177333   0.096012   2.500000   12.262
## JarTrtExposed               -0.844042   0.002581 374.700000 -327.042
## JarSeatable4                 0.004403   0.002800 374.700000    1.572
## JarSeatable6                 0.010553   0.002816 374.700000    3.747
## JarTrtExposed:JarSeatable4  -0.004117   0.003478 374.700000   -1.184
## JarTrtExposed:JarSeatable6  -0.009489   0.003502 374.700000   -2.710
##                            Pr(>|t|)    
## (Intercept)                0.002503 ** 
## JarTrtExposed               < 2e-16 ***
## JarSeatable4               0.116690    
## JarSeatable6               0.000207 ***
## JarTrtExposed:JarSeatable4 0.237239    
## JarTrtExposed:JarSeatable6 0.007038 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) JrTrtE JrStb4 JrStb6 JTE:JS4
## JarTrtExpsd -0.018                             
## JarSeatabl4 -0.015  0.543                      
## JarSeatabl6 -0.015  0.539  0.497               
## JrTrtEx:JS4  0.012 -0.678 -0.805 -0.400        
## JrTrtEx:JS6  0.012 -0.671 -0.400 -0.804  0.499
#JarTrt and JarSeatable are both significant for JarpH and there is a significant interaction, block is also a significant but I don't know what to do about only having Exposed jars from Block2. We were expecting JarTrt, but not JarSeatable. 

Visualize the data for larvae morphology using the subset data

ggplot(LarvaeDat, aes(x=LarvaeDiamum, color=JarTrt))+
  geom_histogram(alpha=0.5, position="identity") +
  facet_grid(ParentTrt~Block)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#interesting bimodality in B2
larvmeans<- aggregate(LarvaeDiamum~ParentTrt*JarTrt, data=LarvaeDat, FUN=mean)#CP/CL : 75.24389; EP/CL: 75.46671; CP/EL: 65.47536; EP/EL: 66.48770
larvmedians<- aggregate(LarvaeDiamum~ParentTrt*JarTrt, data=LarvaeDat, FUN=median)#CP/CL : 75.29579; EP/CL: 75.52886; CP/EL: 65.27141; EP/EL: 66.86843
#I think we can use the mean for these data 

Look at the larvae diameter data

#create most complex model and then use the step function to see which model is best fit for the data; these are for ParentTrt and JarTrt as covariates. I decided to use pH since we have that for the all of the bottles (once we get the salinity to use) and the tanks. 
larvlen<-lmer(LarvaeDiamum~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|JarID)+(1|TrtTankID)+(1|AccTankID)+(1|Block), data=LarvaeDat)

steppedlarvlen<-step(larvlen)
print(steppedlarvlen)
## 
## Random effects:
##             Chi.sq Chi.DF elim.num p.value
## TrtTankID     0.00      1        1  1.0000
## AccTankID     0.00      1        2  1.0000
## Block         0.00      1        3  1.0000
## CrossID       1.62      1        4  0.2028
## FemaleID    107.79      1     kept  <1e-07
## MaleID      114.97      1     kept  <1e-07
## JarSeatable   4.75      1     kept  0.0293
## JarID       608.68      1     kept  <1e-07
## 
## Fixed effects:
##                              Sum Sq   Mean Sq NumDF  DenDF  F.value
## WCa_pHDIC                    3.3920    3.3920     1  37.16   0.3653
## JarOmegaCalcite           2192.1164 2192.1164     1 312.74 236.0772
## WCa_pHDIC:JarOmegaCalcite   50.0248   50.0248     1 312.69   5.3874
##                           elim.num Pr(>F)
## WCa_pHDIC                     kept 0.5492
## JarOmegaCalcite               kept <1e-07
## WCa_pHDIC:JarOmegaCalcite     kept 0.0209
## 
## Least squares means:
##      Estimate Standard Error DF t-value Lower CI Upper CI p-value
## 
##  Differences of LSMEANS:
##      Estimate Standard Error DF t-value Lower CI Upper CI p-value
## 
## Final model:
## lme4::lmer(formula = LarvaeDiamum ~ WCa_pHDIC + JarOmegaCalcite + 
##     (1 | FemaleID) + (1 | MaleID) + (1 | JarSeatable) + (1 | 
##     JarID) + WCa_pHDIC:JarOmegaCalcite, data = LarvaeDat)
#final model chosen by the lme4 step function: y~WCa_pHDIC*JarOmegaCalcite +(1|FemaleID)+(1|MaleID)+(1|JarID) + (1|JarSeatable)
larvlenfin<- lmer(LarvaeDiamum~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|JarSeatable)+(1|JarID), data=LarvaeDat)

#check the final model to see that it meets assumptions
plot(larvlenfin) #seems to meet assumption of linearity and homoscedasticity

qqnorm(resid(larvlenfin))
qqline(resid(larvlenfin)) #has relatively long tails, doesn't seem to meet assumption of normality. Check with Katie 

summary(larvlenfin)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: LarvaeDiamum ~ WCa_pHDIC * JarOmegaCalcite + (1 | FemaleID) +  
##     (1 | MaleID) + (1 | JarSeatable) + (1 | JarID)
##    Data: LarvaeDat
## 
## REML criterion at convergence: 19795.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.4932 -0.5235  0.0475  0.5853  4.5079 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  JarID       (Intercept) 4.0066   2.0017  
##  FemaleID    (Intercept) 6.1849   2.4869  
##  MaleID      (Intercept) 6.5666   2.5625  
##  JarSeatable (Intercept) 0.1607   0.4008  
##  Residual                9.2856   3.0472  
## Number of obs: 3758, groups:  
## JarID, 377; FemaleID, 28; MaleID, 24; JarSeatable, 3
## 
## Fixed effects:
##                           Estimate Std. Error       df t value Pr(>|t|)
## (Intercept)                62.8701     1.5756  36.2000  39.902   <2e-16
## WCa_pHDIC                  -1.0075     1.6669  37.1600  -0.604   0.5492
## JarOmegaCalcite            10.8540     0.7064 312.7400  15.365   <2e-16
## WCa_pHDIC:JarOmegaCalcite   1.7927     0.7724 312.6900   2.321   0.0209
##                              
## (Intercept)               ***
## WCa_pHDIC                    
## JarOmegaCalcite           ***
## WCa_pHDIC:JarOmegaCalcite *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) WC_pHDIC JrOmgC
## WCa_pHDIC   -0.866                
## JarOmegClct -0.243  0.221         
## WC_HDIC:JOC  0.215 -0.246   -0.893
#signficant effect of Jar calcite sat, and sig. interaction between parent and Jar saturation states

#look at autocorrelation. 
acf(resid(larvlenfin)) #this looks good 

#predit some of the data
LarvaeDat$DiamPredict<-predict(larvlenfin)

Do the Larvae analysis without Block2

#Try without B2 data

larvlenB1<-lmer(LarvaeDiamum~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|JarID)+(1|TrtTankID)+(1|AccTankID), data=LarvaeDatB1)

steppedlarvlenB1<-step(larvlenB1)
print(steppedlarvlenB1)
## 
## Random effects:
##             Chi.sq Chi.DF elim.num p.value
## TrtTankID     0.00      1        1  1.0000
## AccTankID     0.70      1        2  0.4035
## CrossID       2.07      1        3  0.1507
## FemaleID     34.48      1     kept  <1e-07
## MaleID       12.38      1     kept  0.0004
## JarSeatable   6.19      1     kept  0.0128
## JarID       192.38      1     kept  <1e-07
## 
## Fixed effects:
##                              Sum Sq   Mean Sq NumDF  DenDF  F.value
## WCa_pHDIC                   54.3154   54.3154     1  24.34   7.0288
## JarOmegaCalcite           3719.8750 3719.8750     1 240.71 481.3803
## WCa_pHDIC:JarOmegaCalcite   86.7777   86.7777     1 240.67  11.2297
##                           elim.num Pr(>F)
## WCa_pHDIC                     kept 0.0139
## JarOmegaCalcite               kept <1e-07
## WCa_pHDIC:JarOmegaCalcite     kept 0.0009
## 
## Least squares means:
##      Estimate Standard Error DF t-value Lower CI Upper CI p-value
## 
##  Differences of LSMEANS:
##      Estimate Standard Error DF t-value Lower CI Upper CI p-value
## 
## Final model:
## lme4::lmer(formula = LarvaeDiamum ~ WCa_pHDIC + JarOmegaCalcite + 
##     (1 | FemaleID) + (1 | MaleID) + (1 | JarSeatable) + (1 | 
##     JarID) + WCa_pHDIC:JarOmegaCalcite, data = LarvaeDatB1)
larvlenB1fin<- lmer(LarvaeDiamum~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|JarID), data=LarvaeDatB1)
#this is the same final model as with both blocks

#check the final model to see that it meets assumptions
plot(larvlenB1fin) #seems to meet assumption of linearity and homoscedasticity

qqnorm(resid(larvlenB1fin))
qqline(resid(larvlenB1fin)) #has relatively long tails, Katie, what do you think? 

#look at autocorrelation. 
acf(resid(larvlenB1fin)) #this looks good 

summary(larvlenB1fin)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: LarvaeDiamum ~ WCa_pHDIC * JarOmegaCalcite + (1 | FemaleID) +  
##     (1 | MaleID) + (1 | JarID)
##    Data: LarvaeDatB1
## 
## REML criterion at convergence: 13527.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.8586 -0.5448  0.0468  0.6013  4.8986 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  JarID    (Intercept) 1.7328   1.3163  
##  FemaleID (Intercept) 0.7665   0.8755  
##  MaleID   (Intercept) 0.3359   0.5796  
##  Residual             7.7275   2.7798  
## Number of obs: 2698, groups:  JarID, 270; FemaleID, 15; MaleID, 11
## 
## Fixed effects:
##                           Estimate Std. Error       df t value Pr(>|t|)
## (Intercept)                63.7253     0.7366  25.3300  86.516  < 2e-16
## WCa_pHDIC                  -2.1639     0.8177  24.4900  -2.646  0.01400
## JarOmegaCalcite            10.8516     0.5041 242.6900  21.527  < 2e-16
## WCa_pHDIC:JarOmegaCalcite   1.8018     0.5511 242.6600   3.270  0.00123
##                              
## (Intercept)               ***
## WCa_pHDIC                 *  
## JarOmegaCalcite           ***
## WCa_pHDIC:JarOmegaCalcite ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) WC_pHDIC JrOmgC
## WCa_pHDIC   -0.889                
## JarOmegClct -0.453  0.398         
## WC_HDIC:JOC  0.404 -0.444   -0.893
#if we just look at block 1, both main effects are significant and their interaction. 

Visualize the larvae length data using continuous variables

#plot the data using the means for each jar
ggplot(LarvaeDat,
       aes(fill = WCa_pHDIC, y = LarvaeDiamum, x = JarOmegaCalcite)) +
  geom_point(aes(shape= Block, colour=WCa_pHDIC)) +
  ggtitle("Larvae diameter by larvae jar saturation state")+ 
  ylab("Larvae Diameter (um)")+
  xlab("Larvae Jar Calcite Saturation State")+
  theme_classic()

#the reason there is spread is that for the jars that we actually measured carbonate dynamics I used those values, for all other jars, I used the average of those measurements

#Try to visualize it in a more pleasant way
#get means and SE of larvae diameters
meanlarv<- ddply(LarvaeDat, .(JarTrt, ParentTrt), numcolwise(mean, na.rm=T))
selarv<- ddply(LarvaeDat, .(JarTrt, ParentTrt), numcolwise(se, na.rm=T))

#plot the mean Larvae Diameter +/- SE
ggplot(data = meanlarv, aes(x = JarOmegaCalcite, y = LarvaeDiamum, group=ParentTrt, fill=ParentTrt)) +
  geom_errorbar(aes(ymin= meanlarv$LarvaeDiamum-selarv$LarvaeDiamum, ymax=meanlarv$LarvaeDiamum+selarv$LarvaeDiamum),width=0.05, position=position_dodge(0))+
   geom_errorbarh(aes(xmin= meanlarv$JarOmegaCalcite-selarv$JarOmegaCalcite, xmax=meanlarv$JarOmegaCalcite+selarv$JarOmegaCalcite),width=0.05, position=position_dodge(0))+
  geom_point(aes(colour=ParentTrt))+
  labs(x = "Larvae Jar Saturation State (Calcite)", y = "Larvae Diameter (um)") +
  scale_color_manual(values = c("skyblue2", "red2")) +
theme_classic()
## Warning: Ignoring unknown parameters: width
## Warning: position_dodge requires non-overlapping x intervals

## Warning: position_dodge requires non-overlapping x intervals

#should we add a model line to this? 

#alternatively, I could visualize with bargraphs: 
ggplot(data = meanlarv, aes(x = JarTrt, y = LarvaeDiamum, fill=ParentTrt)) +
  geom_bar(stat="identity", aes(fill=ParentTrt), position="dodge")+
  labs(x = "Larvae Treatment", y = "Larvae Diameter (um)") +
  scale_fill_manual(name = "Parent Treatment", labels = c("Control", "OA Exposed"), values = c("skyblue2", "red2")) +
  geom_errorbar(aes(ymin= meanlarv$LarvaeDiamum-selarv$LarvaeDiamum, ymax=meanlarv$LarvaeDiamum+selarv$LarvaeDiamum),width=0.2, position=position_dodge(.9))+
  theme_classic()

#plot with parent treatment on the x-axis

ggplot(data = meanlarv, aes(x = ParentTrt , y = LarvaeDiamum, fill=JarTrt)) +
  geom_bar(stat="identity", aes(fill=JarTrt), position="dodge")+
  labs(x = "Parent Treatment", y = "Larvae Diameter (um)") +
  scale_fill_manual(name = "Larvae Treatment", labels = c("Control", "OA Exposed"), values = c("skyblue2", "red2")) +
  geom_errorbar(aes(ymin= meanlarv$LarvaeDiamum-selarv$LarvaeDiamum, ymax=meanlarv$LarvaeDiamum+selarv$LarvaeDiamum),width=0.2, position=position_dodge(.9))+
  theme_classic()

#now plot the difference between parental treatments for larvae diameter
crossdiff <- data.frame(tapply(LarvaeDat$LarvaeDiamum, list(LarvaeDat$CrossID, LarvaeDat$JarTrt), mean)) %>%
  rownames_to_column("CrossID") %>%
  mutate(CrossTemp = CrossID) %>%
  mutate(ParentTrt = ifelse(startsWith(CrossID, "E"), "Exposed", "Control")) %>%
  separate(CrossTemp, c("FemaleID", "MaleID", "BlockID", "Fourth"), sep="_") %>% select(-Fourth) %>%
  mutate(Diff = Exposed - Control)
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1 rows [1].
crossdiff$CrossID<- as.factor(crossdiff$CrossID)
crossdiff$FemaleID<-as.factor(crossdiff$FemaleID)
crossdiff$MaleID<-as.factor(crossdiff$MaleID)

#look at cross differences by Male and Female ID
ggplot(data = crossdiff, aes(x = MaleID, y = Diff)) + 
  geom_point(aes(colour=FemaleID)) + 
  labs(x = "Male ID", y = "Change in mean shell length (um) per family\nfrom control to OA conditions") + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), panel.background = element_blank(), 
        axis.line = element_line(colour = "black"), panel.border=element_rect(colour="black", fill=NA))+
   theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
## Warning: Removed 69 rows containing missing values (geom_point).

#get the means of the differences to visualize overall differences
meandiff<- aggregate(Diff~ParentTrt, data=crossdiff, FUN=mean)
sediff<-  aggregate(Diff~ParentTrt, data=crossdiff, FUN=se)

ggplot(data = meandiff, aes(x = ParentTrt, y = Diff)) + 
  geom_bar(stat="identity", aes(fill="gray45"), position="dodge") + 
  labs(x = "Parental Treatment", y = "Change in mean shell length (um) per family\nfrom control to OA conditions") + 
  scale_x_discrete(breaks = c("Control", "Exposed"), labels = c("Control", "OA Exposed")) + 
  scale_fill_manual(values = "gray45") + 
  geom_errorbar(aes(ymin= meandiff$Diff-sediff$Diff, ymax=meandiff$Diff+sediff$Diff),width=0.2, position=position_dodge(.9))+
  theme(legend.position = "none", panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), panel.background = element_blank(), 
        axis.line = element_line(colour = "black"))

diffttest<- t.test(Diff~ParentTrt, data=crossdiff, var.equal=T)
diffttest #mean shell length difference is significantly higher for OA parent treatment
## 
##  Two Sample t-test
## 
## data:  Diff by ParentTrt
## t = -2.6006, df = 43, p-value = 0.01271
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.1378060 -0.2703394
## sample estimates:
## mean in group Control mean in group Exposed 
##            -11.049847             -9.845775

Analyze at growth per day

larvgrow<-lmer(GrowthPerDay~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|JarID)+(1|TrtTankID)+(1|AccTankID)+(1|Block) , data=LarvaeDat)

steppedlarvgrow<- step(larvgrow)
print(steppedlarvgrow)
## 
## Random effects:
##             Chi.sq Chi.DF elim.num p.value
## TrtTankID     0.00      1        1  1.0000
## AccTankID     0.00      1        2  1.0000
## Block         0.00      1        3  1.0000
## CrossID       1.13      1        4  0.2878
## FemaleID    174.50      1     kept  <1e-07
## MaleID      118.41      1     kept  <1e-07
## JarSeatable   4.85      1     kept  0.0277
## JarID       606.52      1     kept  <1e-07
## 
## Fixed effects:
##                             Sum Sq  Mean Sq NumDF  DenDF  F.value elim.num
## WCa_pHDIC                   1.9298   1.9298     1  39.45   1.8705     kept
## JarOmegaCalcite           246.3220 246.3220     1 315.88 238.7510     kept
## WCa_pHDIC:JarOmegaCalcite   5.5426   5.5426     1 315.82   5.3722     kept
##                           Pr(>F)
## WCa_pHDIC                 0.1792
## JarOmegaCalcite           <1e-07
## WCa_pHDIC:JarOmegaCalcite 0.0211
## 
## Least squares means:
##      Estimate Standard Error DF t-value Lower CI Upper CI p-value
## 
##  Differences of LSMEANS:
##      Estimate Standard Error DF t-value Lower CI Upper CI p-value
## 
## Final model:
## lme4::lmer(formula = GrowthPerDay ~ WCa_pHDIC + JarOmegaCalcite + 
##     (1 | FemaleID) + (1 | MaleID) + (1 | JarSeatable) + (1 | 
##     JarID) + WCa_pHDIC:JarOmegaCalcite, data = LarvaeDat)
#final model chosen by the lme4 step function: y~pHSWCorr*JarOmegaCalcite+(1|FemaleID)+(1|MaleID)+ (1|JarID)
larvgrowfin<- lmer(GrowthPerDay~WCa_pHDIC*JarOmegaCalcite+(1|FemaleID)+(1|MaleID)+(1|JarID)+ (1|JarSeatable), data=LarvaeDat)


#check the final model to see that it meets assumptions
plot(larvgrowfin) #seems to meet assumption of linearity and homoscedasticity

qqnorm(resid(larvgrowfin))
qqline(resid(larvgrowfin)) #has relatively long tails, doesn't seem to meet assumption of normality. 

summary(larvgrowfin)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: GrowthPerDay ~ WCa_pHDIC * JarOmegaCalcite + (1 | FemaleID) +  
##     (1 | MaleID) + (1 | JarID) + (1 | JarSeatable)
##    Data: LarvaeDat
## 
## REML criterion at convergence: 11550.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.4955 -0.5203  0.0481  0.5852  4.5120 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  JarID       (Intercept) 0.44078  0.6639  
##  FemaleID    (Intercept) 0.81313  0.9017  
##  MaleID      (Intercept) 0.79998  0.8944  
##  JarSeatable (Intercept) 0.01798  0.1341  
##  Residual                1.03171  1.0157  
## Number of obs: 3758, groups:  
## JarID, 377; FemaleID, 28; MaleID, 24; JarSeatable, 3
## 
## Fixed effects:
##                           Estimate Std. Error       df t value Pr(>|t|)
## (Intercept)                 0.1795     0.5554  38.2400   0.323   0.7484
## WCa_pHDIC                  -0.8050     0.5886  39.4500  -1.368   0.1792
## JarOmegaCalcite             3.6244     0.2346 315.8800  15.452   <2e-16
## WCa_pHDIC:JarOmegaCalcite   0.5944     0.2565 315.8200   2.318   0.0211
##                              
## (Intercept)                  
## WCa_pHDIC                    
## JarOmegaCalcite           ***
## WCa_pHDIC:JarOmegaCalcite *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) WC_pHDIC JrOmgC
## WCa_pHDIC   -0.867                
## JarOmegClct -0.229  0.208         
## WC_HDIC:JOC  0.202 -0.231   -0.893
#looks like no collinearity between predictors

#look at autocorrelation. 
acf(resid(larvgrowfin)) #this looks fine to me. 

#significant main effect of JarOmegaCalcite and significant interaction 

#plot growth per day
ggplot(LarvaeDat,
       aes(fill = WCa_pHDIC, y = GrowthPerDay, x = JarOmegaCalcite)) +
  geom_point(aes(shape= Block, colour=WCa_pHDIC)) +
  ggtitle("Larvae growth by parent treatment")+ 
  ylab("Growth (um/day)")+
  theme_classic()

#get means and SE of larvae growth
meangrowth<- aggregate(GrowthPerDay~ParentTrt*JarTrt, data=LarvaeDat, FUN=mean)
segrowth<- aggregate(GrowthPerDay~ParentTrt*JarTrt, data=LarvaeDat, FUN=se)

#use a bargraph to plot the mean Larvae Diameter +/- SE
ggplot(data = meangrowth, aes(x = ParentTrt, y = GrowthPerDay, fill=JarTrt)) +
  geom_bar(stat="identity", aes(fill=JarTrt), position="dodge")+
  labs(x = "Parent Treatment", y = "Larvae Growth (um/day)") +
  scale_fill_manual(name = "Larvae Treatment", labels = c("Control", "OA Exposed"), values = c("skyblue2", "red2")) +
  geom_errorbar(aes(ymin= meangrowth$GrowthPerDay-segrowth$GrowthPerDay, ymax=meangrowth$GrowthPerDay+segrowth$GrowthPerDay),width=0.2, position=position_dodge(.9))+
  theme_classic()

#now plot the difference between parental treatments for larvae growth

growthdiff <- data.frame(tapply(LarvaeDat$GrowthPerDay, list(LarvaeDat$CrossID, LarvaeDat$JarTrt), mean)) %>%
  rownames_to_column("CrossID") %>%
  mutate(CrossTemp = CrossID) %>%
  mutate(ParentTrt = ifelse(startsWith(CrossID, "E"), "Exposed", "Control")) %>%
  separate(CrossTemp, c("FemaleID", "MaleID", "BlockID", "Fourth"), sep="_") %>% select(-Fourth) %>%
  mutate(Diff = Exposed - Control)
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1 rows [1].
growthdiff$CrossID<- as.factor(growthdiff$CrossID)
growthdiff$FemaleID<-as.factor(growthdiff$FemaleID)
growthdiff$MaleID<-as.factor(growthdiff$MaleID)

#look at cross differences by Male and Female ID
ggplot(data = growthdiff, aes(x = MaleID, y = Diff)) + 
  geom_point(aes(colour=FemaleID)) + 
  labs(x = "Male ID", y = "Change in larvae growth (um/day) per family\nfrom control to OA conditions") + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), panel.background = element_blank(), 
        axis.line = element_line(colour = "black"), panel.border=element_rect(colour="black", fill=NA))+
   theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
## Warning: Removed 69 rows containing missing values (geom_point).

#get the means of the differences to visualize overall differences
meangrdiff<- aggregate(Diff~ParentTrt, data=growthdiff, FUN=mean)
segrdiff<-  aggregate(Diff~ParentTrt, data=growthdiff, FUN=se)

ggplot(data = meangrdiff, aes(x = ParentTrt, y = Diff)) + 
  geom_bar(stat="identity", aes(fill="gray45"), position="dodge") + 
  labs(x = "Parental Treatment", y = "Change in larvae growth (um) per family\nfrom control to OA conditions") + 
  scale_x_discrete(breaks = c("Control", "Exposed"), labels = c("Control", "OA")) + 
  scale_fill_manual(values = "gray45") + 
  geom_errorbar(aes(ymin= meangrdiff$Diff-segrdiff$Diff, ymax=meangrdiff$Diff+segrdiff$Diff),width=0.2, position=position_dodge(.9))+
  theme(legend.position = "none", panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), panel.background = element_blank(), 
        axis.line = element_line(colour = "black"))

diffgrttest<- t.test(Diff~ParentTrt, data=growthdiff, var.equal=T)
diffgrttest #significant effect of parent treatment on growth. 
## 
##  Two Sample t-test
## 
## data:  Diff by ParentTrt
## t = -2.6006, df = 43, p-value = 0.01271
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.71260201 -0.09011314
## sample estimates:
## mean in group Control mean in group Exposed 
##             -3.683282             -3.281925

Look at larvae area

boxplot(LarvaeAreaum2~Block*ParentTrt, data=LarvaeDat)

#quick anova to look for potential differences
anovarea<- aov(LarvaeAreaum2~ParentTrt*JarTrt, data=LarvaeDat)
anova(anovarea)
## Analysis of Variance Table
## 
## Response: LarvaeAreaum2
##                    Df    Sum Sq   Mean Sq   F value  Pr(>F)    
## ParentTrt           1    188189    188189    1.1413 0.28545    
## JarTrt              1 755776265 755776265 4583.4692 < 2e-16 ***
## ParentTrt:JarTrt    1    551198    551198    3.3428 0.06758 .  
## Residuals        3754 619003638    164892                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
shapiro.test(anovarea$residuals) #doesn't meet assumption of normality
## 
##  Shapiro-Wilk normality test
## 
## data:  anovarea$residuals
## W = 0.99594, p-value = 1.257e-08
par(mfcol=c(2,2))
plot(anovarea)

par(mfcol=c(1,1))
#create most complex model and then use the step function to see which model is best fit for the data; these are for ParentTrt and JarTrt as fixed effects not covariates
larvarea<-lmer(LarvaeAreaum2~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|JarID)+(1|TrtTankID)+(1|AccTankID)+(1|Block) , data=LarvaeDat)

steppedlarvarea<-step(larvarea)
print(steppedlarvarea)
## 
## Random effects:
##             Chi.sq Chi.DF elim.num p.value
## TrtTankID     0.00      1        1  1.0000
## Block         0.00      1        2  1.0000
## AccTankID     0.00      1        3  1.0000
## FemaleID     23.24      1     kept       0
## MaleID       29.00      1     kept  <1e-07
## CrossID       3.87      1     kept  0.0492
## JarSeatable   3.19      1     kept  0.0739
## JarID       542.13      1     kept  <1e-07
## 
## Fixed effects:
##                                 Sum Sq      Mean Sq NumDF  DenDF   F.value
## WCa_pHDIC:JarOmegaCalcite 2.417751e+05 2.417751e+05     1 277.52    3.2754
## WCa_pHDIC                 5.888224e+02 5.888224e+02     1  29.92    0.0080
## JarOmegaCalcite           1.434758e+08 1.434758e+08     1 278.07 1943.7314
##                           elim.num Pr(>F)
## WCa_pHDIC:JarOmegaCalcite        1 0.0714
## WCa_pHDIC                        2 0.9294
## JarOmegaCalcite               kept <1e-07
## 
## Least squares means:
##      Estimate Standard Error DF t-value Lower CI Upper CI p-value
## 
##  Differences of LSMEANS:
##      Estimate Standard Error DF t-value Lower CI Upper CI p-value
## 
## Final model:
## lme4::lmer(formula = LarvaeAreaum2 ~ JarOmegaCalcite + (1 | FemaleID) + 
##     (1 | MaleID) + (1 | CrossID) + (1 | JarSeatable) + (1 | JarID), 
##     data = LarvaeDat)
#final model chosen by the lme4 step function: y~JarOmegaCalcite+(1|FemaleID)+(1|MaleID)+ (1|JarSeatable)+(1|JarID)
larvareafin<- lmer(LarvaeAreaum2~JarOmegaCalcite+(1|FemaleID)+(1|MaleID)+ (1|JarSeatable)+(1|JarID), data=LarvaeDat)


#check the final model to see that it meets assumptions
plot(larvareafin) #seems to meet assumption of linearity and homoscedasticity

qqnorm(resid(larvareafin))
qqline(resid(larvareafin)) #has relatively long tails, doesn't seem to meet assumption of normality. 

summary(larvareafin)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: 
## LarvaeAreaum2 ~ JarOmegaCalcite + (1 | FemaleID) + (1 | MaleID) +  
##     (1 | JarSeatable) + (1 | JarID)
##    Data: LarvaeDat
## 
## REML criterion at convergence: 53546.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.0323 -0.5695  0.0387  0.6062  4.9336 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  JarID       (Intercept) 33966.7  184.30  
##  FemaleID    (Intercept) 44846.0  211.77  
##  MaleID      (Intercept) 47896.4  218.85  
##  JarSeatable (Intercept)   928.7   30.47  
##  Residual                73809.1  271.68  
## Number of obs: 3758, groups:  
## JarID, 377; FemaleID, 28; MaleID, 24; JarSeatable, 3
## 
## Fixed effects:
##                 Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)      2548.47      66.99   35.26   38.04   <2e-16 ***
## JarOmegaCalcite  1241.41      29.13  314.93   42.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## JarOmegClct -0.239
#look at autocorrelation. 
acf(resid(larvareafin)) #this looks fine to me. 

#plot the data using the means for each jar
ggplot(LarvaeDat,
       aes(fill = WCa_pHDIC, y = LarvaeAreaum2, x = JarOmegaCalcite)) +
  geom_point(aes(shape= Block, colour=WCa_pHDIC)) +
  ggtitle("Larvae area by larvae treatment")+ 
  ylab("Larvae Area (um2)")+
  theme_classic()

#get means and SE of larvae areas
meanarea<- aggregate(LarvaeAreaum2~ParentTrt*JarTrt, data=LarvaeDat, FUN=mean)
searea<- aggregate(LarvaeAreaum2~ParentTrt*JarTrt, data=LarvaeDat, FUN=se)

#use a bargraph to plot the mean Larvae area +/- SE
ggplot(data = meanarea, aes(x = ParentTrt, y = LarvaeAreaum2, fill=JarTrt)) +
  geom_bar(stat="identity", aes(fill=JarTrt), position="dodge")+
  labs(x = "Parent Treatment", y = "Larvae Area (um2)") +
  scale_fill_manual(name = "Larvae Treatment", labels = c("Control", "OA"), values = c("skyblue2", "red2")) +
  geom_errorbar(aes(ymin= meanarea$LarvaeAreaum2-searea$LarvaeAreaum2, ymax=meanarea$LarvaeAreaum2+searea$LarvaeAreaum2),width=0.2, position=position_dodge(.9))+
  theme_classic()

#now plot the difference between parental treatments for larvae area

areadiff <- data.frame(tapply(LarvaeDat$LarvaeAreaum2, list(LarvaeDat$CrossID, LarvaeDat$JarTrt), mean)) %>%
  rownames_to_column("CrossID") %>%
  mutate(CrossTemp = CrossID) %>%
  mutate(ParentTrt = ifelse(startsWith(CrossID, "E"), "Exposed", "Control")) %>%
  separate(CrossTemp, c("FemaleID", "MaleID", "BlockID", "Fourth"), sep="_") %>% select(-Fourth) %>%
  mutate(Diff = Exposed - Control)
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1 rows [1].
areadiff$CrossID<- as.factor(crossdiff$CrossID)
areadiff$FemaleID<-as.factor(crossdiff$FemaleID)
areadiff$MaleID<-as.factor(crossdiff$MaleID)

#look at cross differences by Male and Female ID
ggplot(data = areadiff, aes(x = MaleID, y = Diff)) + 
  geom_point(aes(colour=FemaleID)) + 
  labs(x = "Male ID", y = "Change in mean shell area (um2) per family\nfrom control to OA conditions") + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), panel.background = element_blank(), 
        axis.line = element_line(colour = "black"), panel.border=element_rect(colour="black", fill=NA))+
   theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
## Warning: Removed 69 rows containing missing values (geom_point).

#get the means of the differences to visualize overall differences
meanareadiff<- aggregate(Diff~ParentTrt, data=areadiff, FUN=mean)
seareadiff<-  aggregate(Diff~ParentTrt, data=areadiff, FUN=se)

ggplot(data = meanareadiff, aes(x = ParentTrt, y = Diff)) + 
  geom_bar(stat="identity", aes(fill="gray45"), position="dodge") + 
  labs(x = "Parental Treatment", y = "Change in mean shell area (um2) per family\nfrom control to OA conditions") + 
  scale_x_discrete(breaks = c("Control", "Exposed"), labels = c("Control", "OA")) + 
  scale_fill_manual(values = "gray45") + 
  geom_errorbar(aes(ymin= meanareadiff$Diff-seareadiff$Diff, ymax=meanareadiff$Diff+seareadiff$Diff),width=0.2, position=position_dodge(.9))+
  theme(legend.position = "none", panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), panel.background = element_blank(), 
        axis.line = element_line(colour = "black"))

diffareattest<- t.test(Diff~ParentTrt, data=areadiff)
diffareattest #mean shell area difference is not significantly higher for OA parent treatment
## 
##  Welch Two Sample t-test
## 
## data:  Diff by ParentTrt
## t = -1.7272, df = 39.691, p-value = 0.09192
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -168.53796   13.23648
## sample estimates:
## mean in group Control mean in group Exposed 
##             -1093.243             -1015.592

Look at growth in terms of area

#just subtract egg area from larvae area and then divide by 3
LarvaeDat$AreaGrowthPerDay<- (LarvaeDat$LarvaeAreaum2-LarvaeDat$EggAreaum2)/3

#create LMM for growth per day
larvgrowarea<-lmer(AreaGrowthPerDay~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|JarID)+(1|TrtTankID)+(1|AccTankID)+(1|Block) , data=LarvaeDat)

steppedlarvgrowarea<- step(larvgrowarea)
print(steppedlarvgrowarea)
## 
## Random effects:
##             Chi.sq Chi.DF elim.num p.value
## Block         0.00      1        1  1.0000
## AccTankID     0.00      1        2  1.0000
## TrtTankID     0.00      1        3  1.0000
## FemaleID     35.74      1     kept  <1e-07
## MaleID       32.46      1     kept  <1e-07
## CrossID       3.11      1     kept  0.0778
## JarSeatable   3.26      1     kept  0.0712
## JarID       541.97      1     kept  <1e-07
## 
## Fixed effects:
##                               Sum Sq    Mean Sq NumDF  DenDF   F.value
## WCa_pHDIC:JarOmegaCalcite    26423.4    26423.4     1 278.47    3.2217
## WCa_pHDIC                    15058.6    15058.6     1  33.76    1.8361
## JarOmegaCalcite           15988833.1 15988833.1     1 279.17 1949.4845
##                           elim.num Pr(>F)
## WCa_pHDIC:JarOmegaCalcite        1 0.0738
## WCa_pHDIC                        2 0.1844
## JarOmegaCalcite               kept <1e-07
## 
## Least squares means:
##      Estimate Standard Error DF t-value Lower CI Upper CI p-value
## 
##  Differences of LSMEANS:
##      Estimate Standard Error DF t-value Lower CI Upper CI p-value
## 
## Final model:
## lme4::lmer(formula = AreaGrowthPerDay ~ JarOmegaCalcite + (1 | 
##     FemaleID) + (1 | MaleID) + (1 | CrossID) + (1 | JarSeatable) + 
##     (1 | JarID), data = LarvaeDat)
#final model chosen by the lme4 step function: y~JarOmegaCalcite+(1|FemaleID)+(1|CrossID)+ (1|MaleID)+ (1|JarSeatable)+(1|JarID)
larvgrowareafin<- lmer(AreaGrowthPerDay~JarTrt+(1|FemaleID)+(1|MaleID)+ (1|CrossID)+ (1|JarSeatable)+(1|JarID), data=LarvaeDat)


#check the final model to see that it meets assumptions
plot(larvgrowareafin) #seems to meet assumption of linearity and homoscedasticity

qqnorm(resid(larvgrowareafin))
qqline(resid(larvgrowareafin)) #has relatively long tails, doesn't seem to meet assumption of normality. 

summary(larvgrowareafin)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: AreaGrowthPerDay ~ JarTrt + (1 | FemaleID) + (1 | MaleID) + (1 |  
##     CrossID) + (1 | JarSeatable) + (1 | JarID)
##    Data: LarvaeDat
## 
## REML criterion at convergence: 45296.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.0718 -0.5668  0.0409  0.6045  4.9666 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  JarID       (Intercept) 3432.4   58.59   
##  CrossID     (Intercept)  572.7   23.93   
##  FemaleID    (Intercept) 6777.6   82.33   
##  MaleID      (Intercept) 5724.8   75.66   
##  JarSeatable (Intercept)  127.8   11.31   
##  Residual                8201.6   90.56   
## Number of obs: 3758, groups:  
## JarID, 377; CrossID, 81; FemaleID, 28; MaleID, 24; JarSeatable, 3
## 
## Fixed effects:
##               Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)    392.015     24.568   37.460   15.96   <2e-16 ***
## JarTrtExposed -351.498      7.916  281.180  -44.40   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## JarTrtExpsd -0.239
#look at autocorrelation. 
acf(resid(larvgrowareafin)) #this looks fine to me. 

#plot growth per day
ggplot(LarvaeDat,
       aes(fill = WCa_pHDIC, y = AreaGrowthPerDay, x = JarOmegaCalcite)) +
  geom_point(aes(shape= Block, colour=WCa_pHDIC)) +
  ggtitle("Larvae growth by larvae treatment")+ 
  ylab("Area Growth (um/day)")+
  theme_classic()

#get means and SE of larvae growth
meanareagrowth<- aggregate(AreaGrowthPerDay~ParentTrt*JarTrt, data=LarvaeDat, FUN=mean)
seareagrowth<- aggregate(AreaGrowthPerDay~ParentTrt*JarTrt, data=LarvaeDat, FUN=se)

#use a bargraph to plot the mean Larvae area +/- SE
ggplot(data = meanareagrowth, aes(x = ParentTrt, y = AreaGrowthPerDay, fill=JarTrt)) +
  geom_bar(stat="identity", aes(fill=JarTrt), position="dodge")+
  labs(x = "Parent Treatment", y = "Larvae Growth (um2/day)") +
  scale_fill_manual(name = "Larvae Treatment", labels = c("Control", "OA Exposed"), values = c("skyblue2", "red2")) +
  geom_errorbar(aes(ymin= meanareagrowth$AreaGrowthPerDay-seareagrowth$AreaGrowthPerDay, ymax=meanareagrowth$AreaGrowthPerDay+seareagrowth$AreaGrowthPerDay),width=0.2, position=position_dodge(.9))+
  theme_classic()

#now plot the difference between parental treatments for larvae growth
areagrowthdiff <- data.frame(tapply(LarvaeDat$AreaGrowthPerDay, list(LarvaeDat$CrossID, LarvaeDat$JarTrt), mean)) %>%
  rownames_to_column("CrossID") %>%
  mutate(CrossTemp = CrossID) %>%
  mutate(ParentTrt = ifelse(startsWith(CrossID, "E"), "Exposed", "Control")) %>%
  separate(CrossTemp, c("FemaleID", "MaleID", "BlockID", "Fourth"), sep="_") %>% select(-Fourth) %>%
  mutate(Diff = Exposed - Control)
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1 rows [1].
areagrowthdiff$CrossID<- as.factor(areagrowthdiff$CrossID)
areagrowthdiff$FemaleID<-as.factor(areagrowthdiff$FemaleID)
areagrowthdiff$MaleID<-as.factor(areagrowthdiff$MaleID)

#look at cross differences by Male and Female ID
ggplot(data = areagrowthdiff, aes(x = MaleID, y = Diff)) + 
  geom_point(aes(colour=FemaleID)) + 
  labs(x = "Male ID", y = "Change in larvae growth area (um2/day) per family\nfrom control to OA conditions") + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), panel.background = element_blank(), 
        axis.line = element_line(colour = "black"), panel.border=element_rect(colour="black", fill=NA))+
   theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
## Warning: Removed 69 rows containing missing values (geom_point).

#get the means of the differences to visualize overall differences
meanardiff<- aggregate(Diff~ParentTrt, data=areagrowthdiff, FUN=mean)
seardiff<-  aggregate(Diff~ParentTrt, data=areagrowthdiff, FUN=se)

ggplot(data = meanardiff, aes(x = ParentTrt, y = Diff)) + 
  geom_bar(stat="identity", aes(fill="gray45"), position="dodge") + 
  labs(x = "Parental Treatment", y = "Change in larvae growth area (um2) per family\nfrom control to OA conditions") + 
  scale_x_discrete(breaks = c("Control", "Exposed"), labels = c("Control", "OA")) + 
  scale_fill_manual(values = "gray45") + 
  geom_errorbar(aes(ymin= meanardiff$Diff-seardiff$Diff, ymax=meanardiff$Diff+seardiff$Diff),width=0.2, position=position_dodge(.9))+
  theme(legend.position = "none", panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), panel.background = element_blank(), 
        axis.line = element_line(colour = "black"))

diffarttest<- t.test(Diff~ParentTrt, data=areagrowthdiff)
diffarttest #significant effect of parent treatment on growth. 
## 
##  Welch Two Sample t-test
## 
## data:  Diff by ParentTrt
## t = -1.7272, df = 39.691, p-value = 0.09192
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -56.179320   4.412162
## sample estimates:
## mean in group Control mean in group Exposed 
##             -364.4143             -338.5307

Characterize the larvae by the ratio of major and minor axis

#make a column for the ratio of major to minor axis
LarvaeDat$MajMinRat<- LarvaeDat$LarvaeMajorAxisLengthPix/LarvaeDat$LarvaeMinorAxisLengthPix

#get the mean ratio for the treatments to plot
meanrat<- ddply(LarvaeDat, .(JarTrt, ParentTrt), numcolwise(mean, na.rm=T))
serat<- ddply(LarvaeDat, .(JarTrt, ParentTrt), numcolwise(se, na.rm=T))

ggplot(data = meanrat, aes(x = ParentTrt, y = MajMinRat, fill=JarTrt)) +
  geom_bar(stat="identity", aes(fill=JarTrt), position="dodge")+
  labs(x = "Parent Treatment", y = "Ratio of Major to Minor Axis") +
  scale_fill_manual(name = "Larvae Treatment", labels = c("Control", "OA Exposed"), values = c("skyblue2", "red2")) +
  geom_errorbar(aes(ymin= meanrat$MajMinRat-serat$MajMinRat, ymax=meanrat$MajMinRat+serat$MajMinRat),width=0.2, position=position_dodge(.9))+
  theme_classic()

#quick test to see if there is a significant difference
larvrat<-lmer(MajMinRat~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|JarSeatable)+(1|JarID) , data=LarvaeDat)

lmrat1<- lm(MajMinRat~WCa_pHDIC*JarOmegaCalcite, data=LarvaeDat)
plot(resid(lmrat1)~LarvaeDat$FemaleID)

#check the model to see that it meets assumptions
plot(larvrat) #seems to meet assumption of linearity and homoscedasticity

qqnorm(resid(larvrat))
qqline(resid(larvrat)) #has relatively long tails, doesn't seem to meet assumption of normality. 

summary(larvrat) #no significant differences, but interaction between parent treatment and larval treatment are both nearly significant
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: MajMinRat ~ WCa_pHDIC * JarOmegaCalcite + (1 | FemaleID) + (1 |  
##     MaleID) + (1 | JarSeatable) + (1 | JarID)
##    Data: LarvaeDat
## 
## REML criterion at convergence: -12932.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7873 -0.5941 -0.0385  0.5528  4.5524 
## 
## Random effects:
##  Groups      Name        Variance  Std.Dev.
##  JarID       (Intercept) 1.269e-04 0.011266
##  FemaleID    (Intercept) 8.012e-05 0.008951
##  MaleID      (Intercept) 9.268e-06 0.003044
##  JarSeatable (Intercept) 1.335e-06 0.001155
##  Residual                1.736e-03 0.041667
## Number of obs: 3758, groups:  
## JarID, 377; FemaleID, 28; MaleID, 24; JarSeatable, 3
## 
## Fixed effects:
##                             Estimate Std. Error         df t value
## (Intercept)                 1.173838   0.005218  55.800000 224.959
## WCa_pHDIC                  -0.010436   0.005719  59.000000  -1.825
## JarOmegaCalcite            -0.008702   0.005459 333.000000  -1.594
## WCa_pHDIC:JarOmegaCalcite   0.011422   0.005971 331.900000   1.913
##                           Pr(>|t|)    
## (Intercept)                 <2e-16 ***
## WCa_pHDIC                   0.0731 .  
## JarOmegaCalcite             0.1119    
## WCa_pHDIC:JarOmegaCalcite   0.0566 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) WC_pHDIC JrOmgC
## WCa_pHDIC   -0.870                
## JarOmegClct -0.572  0.507         
## WC_HDIC:JOC  0.509 -0.571   -0.892
#look at autocorrelation. 
acf(resid(larvrat)) #this looks fine to me.

Look at eccentricity

#look at the eccentricity data
hist(LarvaeDat$LarvaeEccentricity)#data are left skewed. 

#get the mean eccentricity for the treatments to plot
meaneccen<- aggregate(LarvaeEccentricity~ParentTrt*JarTrt, data=LarvaeDat, FUN=mean)
seeccen<- aggregate(LarvaeEccentricity~ParentTrt*JarTrt, data=LarvaeDat, FUN=se)

ggplot(data = meaneccen, aes(x = ParentTrt, y = LarvaeEccentricity, fill=JarTrt)) +
  geom_bar(stat="identity", aes(fill=JarTrt), position="dodge")+
  labs(x = "Parent Treatment", y = "Larvae Eccentricity") +
  scale_fill_manual(name = "Larvae Treatment", labels = c("Control", "OA"), values = c("skyblue2", "red2")) +
  geom_errorbar(aes(ymin= meaneccen$LarvaeEccentricity-seeccen$LarvaeEccentricity, ymax=meaneccen$LarvaeEccentricity+seeccen$LarvaeEccentricity),width=0.2, position=position_dodge(.9))+
  theme_classic()

#quick test to see if there is a significant difference
larveccen<-lmer(LarvaeEccentricity~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|JarID)+(1|TrtTankID)+(1|AccTankID)+(1|Block) , data=LarvaeDat)

steppedlarveccen<- step(larveccen)
print(steppedlarveccen)
## 
## Random effects:
##             Chi.sq Chi.DF elim.num p.value
## TrtTankID     0.03      1        1  0.8571
## CrossID       0.13      1        2  0.7218
## JarSeatable   0.20      1        3  0.6543
## AccTankID     0.54      1        4  0.4628
## MaleID        0.56      1        5  0.4531
## Block         2.50      1        6  0.1138
## FemaleID     34.33      1     kept  <1e-07
## JarID        76.42      1     kept  <1e-07
## 
## Fixed effects:
##                           Sum Sq Mean Sq NumDF  DenDF F.value elim.num
## WCa_pHDIC:JarOmegaCalcite 0.0093  0.0093     1 345.60  3.0753        1
## WCa_pHDIC                 0.0028  0.0028     1  25.35  0.9415        2
## JarOmegaCalcite           0.0152  0.0152     1 345.45  5.0440     kept
##                           Pr(>F)
## WCa_pHDIC:JarOmegaCalcite 0.0804
## WCa_pHDIC                 0.3411
## JarOmegaCalcite           0.0253
## 
## Least squares means:
##      Estimate Standard Error DF t-value Lower CI Upper CI p-value
## 
##  Differences of LSMEANS:
##      Estimate Standard Error DF t-value Lower CI Upper CI p-value
## 
## Final model:
## lme4::lmer(formula = LarvaeEccentricity ~ JarOmegaCalcite + (1 | 
##     FemaleID) + (1 | JarID), data = LarvaeDat)
#final model only had JarTrt
larveccenfin<- lmer(LarvaeEccentricity~JarOmegaCalcite+ (1|FemaleID)+(1|JarID), data=LarvaeDat)

#check the model to see that it meets assumptions
plot(larveccenfin) #seems to meet assumption of linearity and homoscedasticity

qqnorm(resid(larveccenfin))
qqline(resid(larveccenfin)) #has relatively long tails, doesn't meet assumption of normality. 

summary(larveccenfin) #significant effect of JarTrt
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: LarvaeEccentricity ~ JarOmegaCalcite + (1 | FemaleID) + (1 |  
##     JarID)
##    Data: LarvaeDat
## 
## REML criterion at convergence: -10830.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.7095 -0.4842  0.0492  0.5902  3.4868 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev.
##  JarID    (Intercept) 0.0002857 0.01690 
##  FemaleID (Intercept) 0.0001422 0.01192 
##  Residual             0.0030231 0.05498 
## Number of obs: 3758, groups:  JarID, 377; FemaleID, 28
## 
## Fixed effects:
##                  Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)     5.027e-01  3.262e-03 6.060e+01 154.110   <2e-16 ***
## JarOmegaCalcite 7.725e-03  3.440e-03 3.455e+02   2.246   0.0253 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## JarOmegClct -0.596
#look at autocorrelation. 
acf(resid(larveccenfin)) #this looks fine to me.

#see if a square root transformation helps
larveccent<-lmer((LarvaeEccentricity^2)~JarOmegaCalcite+ (1|FemaleID)+(1|JarID) , data=LarvaeDat)

#check the model to see that it meets assumptions
plot(larveccent) #seems to meet assumption of linearity and homoscedasticity

qqnorm(resid(larveccent))
qqline(resid(larveccent)) #has relatively long tails, doesn't meet assumption of normality. transformation helped

summary(larveccent) #no significant differences 
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: (LarvaeEccentricity^2) ~ JarOmegaCalcite + (1 | FemaleID) + (1 |  
##     JarID)
##    Data: LarvaeDat
## 
## REML criterion at convergence: -11156.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.4618 -0.5548  0.0090  0.5923  3.8526 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev.
##  JarID    (Intercept) 0.0002415 0.01554 
##  FemaleID (Intercept) 0.0001314 0.01146 
##  Residual             0.0027820 0.05274 
## Number of obs: 3758, groups:  JarID, 377; FemaleID, 28
## 
## Fixed effects:
##                  Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)     2.586e-01  3.101e-03 5.960e+01   83.40   <2e-16 ***
## JarOmegaCalcite 3.754e-03  3.235e-03 3.472e+02    1.16    0.247    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## JarOmegClct -0.589

Look at larvae perimeter

hist(LarvaeDat$LarvaePerimeterum)

#test to see if there is a significant difference
larvperim<-lmer(LarvaePerimeterum~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|JarID)+(1|TrtTankID)+(1|AccTankID)+(1|Block) , data=LarvaeDat)

steppedlarvperim<- step(larvperim)
print(steppedlarvperim)
## 
## Random effects:
##             Chi.sq Chi.DF elim.num p.value
## TrtTankID     0.00      1        1  1.0000
## AccTankID     0.00      1        2  1.0000
## Block         0.00      1        3  1.0000
## FemaleID     24.69      1     kept       0
## MaleID       30.84      1     kept  <1e-07
## CrossID       2.82      1     kept  0.0931
## JarSeatable   4.94      1     kept  0.0263
## JarID       555.87      1     kept  <1e-07
## 
## Fixed effects:
##                               Sum Sq    Mean Sq NumDF  DenDF  F.value
## WCa_pHDIC                    13.3777    13.3777     1  34.45   0.1552
## JarOmegaCalcite           26016.3238 26016.3238     1 274.46 301.8615
## WCa_pHDIC:JarOmegaCalcite   341.4119   341.4119     1 274.81   3.9613
##                           elim.num Pr(>F)
## WCa_pHDIC                     kept 0.6960
## JarOmegaCalcite               kept <1e-07
## WCa_pHDIC:JarOmegaCalcite     kept 0.0475
## 
## Least squares means:
##      Estimate Standard Error DF t-value Lower CI Upper CI p-value
## 
##  Differences of LSMEANS:
##      Estimate Standard Error DF t-value Lower CI Upper CI p-value
## 
## Final model:
## lme4::lmer(formula = LarvaePerimeterum ~ WCa_pHDIC + JarOmegaCalcite + 
##     (1 | FemaleID) + (1 | MaleID) + (1 | CrossID) + (1 | JarSeatable) + 
##     (1 | JarID) + WCa_pHDIC:JarOmegaCalcite, data = LarvaeDat)
#final model chosen by the step function: LarvaePerimeterum~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|JarID)
larvperimfin<- lmer(LarvaePerimeterum~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|JarID), data=LarvaeDat)

#check the model to see that it meets assumptions
plot(larvperimfin) #seems to meet assumption of linearity and homoscedasticity

qqnorm(resid(larvperimfin))
qqline(resid(larvperimfin)) #has relatively long tails, doesn't meet assumption of normality. 

summary(larvperimfin)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: 
## LarvaePerimeterum ~ WCa_pHDIC * JarOmegaCalcite + (1 | FemaleID) +  
##     (1 | MaleID) + (1 | CrossID) + (1 | JarSeatable) + (1 | JarID)
##    Data: LarvaeDat
## 
## REML criterion at convergence: 28176.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.8942 -0.5378  0.0467  0.5950  4.8692 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  JarID       (Intercept) 37.253   6.104   
##  CrossID     (Intercept)  6.961   2.638   
##  FemaleID    (Intercept) 51.519   7.178   
##  MaleID      (Intercept) 55.727   7.465   
##  JarSeatable (Intercept)  1.543   1.242   
##  Residual                86.186   9.284   
## Number of obs: 3758, groups:  
## JarID, 377; CrossID, 81; FemaleID, 28; MaleID, 24; JarSeatable, 3
## 
## Fixed effects:
##                           Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)                193.856      4.642  33.250  41.758   <2e-16 ***
## WCa_pHDIC                   -1.938      4.919  34.450  -0.394   0.6960    
## JarOmegaCalcite             37.418      2.154 274.460  17.374   <2e-16 ***
## WCa_pHDIC:JarOmegaCalcite    4.687      2.355 274.810   1.990   0.0475 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) WC_pHDIC JrOmgC
## WCa_pHDIC   -0.865                
## JarOmegClct -0.252  0.229         
## WC_HDIC:JOC  0.222 -0.254   -0.893
#look at autocorrelation. 
acf(resid(larvperimfin)) #this looks fine to me.

ggplot(LarvaeDat,
       aes(fill = WCa_pHDIC, y = LarvaePerimeterum, x = JarOmegaCalcite)) +
  geom_point(aes(shape= Block, colour=WCa_pHDIC)) +
  ggtitle("Larvae area by larvae treatment")+ 
  ylab("Larvae Perimeter (um)")+
  theme_classic()

meanperim<- ddply(LarvaeDat, .(JarTrt, ParentTrt), numcolwise(mean, na.rm=T))
seperim<- ddply(LarvaeDat, .(JarTrt, ParentTrt), numcolwise(se, na.rm=T))

ggplot(data = meanperim, aes(x = ParentTrt, y = LarvaePerimeterum, fill=JarTrt)) +
  geom_bar(stat="identity", aes(fill=JarTrt), position="dodge")+
  labs(x = "Parent Treatment", y = "Larvae Perimeter (um)") +
  scale_fill_manual(name = "Larvae Treatment", labels = c("Control", "OA Exposed"), values = c("skyblue2", "red2")) +
  geom_errorbar(aes(ymin= meanperim$LarvaePerimeterum-seperim$LarvaePerimeterum, ymax=meanperim$LarvaePerimeterum+seperim$LarvaePerimeterum),width=0.2, position=position_dodge(.9))+
  theme_classic()

#see if a square root transformation helps
larvperimfint<- lmer(sqrt(LarvaePerimeterum)~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|JarID), data=LarvaeDat)

#check the model to see that it meets assumptions
plot(larvperimfint) #seems to meet assumption of linearity and homoscedasticity

qqnorm(resid(larvperimfint))
qqline(resid(larvperimfint)) #has relatively long tails, doesn't meet assumption of normality. #transformation didn't help

summary(larvperimfint) #significant effect of JarOmegaCalcite and signficant interaction
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: sqrt(LarvaePerimeterum) ~ WCa_pHDIC * JarOmegaCalcite + (1 |  
##     FemaleID) + (1 | MaleID) + (1 | CrossID) + (1 | JarSeatable) +  
##     (1 | JarID)
##    Data: LarvaeDat
## 
## REML criterion at convergence: 2943.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.8139 -0.5169  0.0547  0.5830  4.7654 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  JarID       (Intercept) 0.045689 0.21375 
##  CrossID     (Intercept) 0.007704 0.08778 
##  FemaleID    (Intercept) 0.063435 0.25186 
##  MaleID      (Intercept) 0.069253 0.26316 
##  JarSeatable (Intercept) 0.001854 0.04306 
##  Residual                0.103673 0.32198 
## Number of obs: 3758, groups:  
## JarID, 377; CrossID, 81; FemaleID, 28; MaleID, 24; JarSeatable, 3
## 
## Fixed effects:
##                            Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)                13.93075    0.16302  33.25000  85.452   <2e-16
## WCa_pHDIC                  -0.06646    0.17270  34.46000  -0.385    0.703
## JarOmegaCalcite             1.26126    0.07529 272.81000  16.753   <2e-16
## WCa_pHDIC:JarOmegaCalcite   0.16579    0.08233 273.20000   2.014    0.045
##                              
## (Intercept)               ***
## WCa_pHDIC                    
## JarOmegaCalcite           ***
## WCa_pHDIC:JarOmegaCalcite *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) WC_pHDIC JrOmgC
## WCa_pHDIC   -0.865                
## JarOmegClct -0.251  0.228         
## WC_HDIC:JOC  0.221 -0.253   -0.893

Try different type of visualization: Make plot that looks at morphology by larvae and parent treatment

par(mar = c(5, 4, 0.5, 0.5), bg = "transparent")
boxplot(LarvByJarDat$LarvaeDiamum ~ LarvByJarDat$JarTrt * LarvByJarDat$ParentTrt, 
        xlim = c(0, 5),
        xlab = "Larvae Treatment",
        ylab = "Diameter (um)",
        ylim = c(0,85),
        names = c("Control", "OA Exposed", "Control", "OA Exposed"), 
        col = c("powderblue", "red"))
rect(xleft = -0.2, 
     ybottom = 0, 
     xright = 2.5, 
     ytop = 85,
     border = FALSE,
     col = rgb(0, 0, 0.5, alpha = 0.1))
rect(xleft = 2.5, 
     ybottom = 0, 
     xright = 5.2, 
     ytop = 85,
     border = FALSE,
     col = rgb(0.5, 0, 0, alpha = 0.1))
text(x = 1.25, y = 84, "Control Parents")
text(x = 3.75, y = 84, "OA Exposed Parents")
boxplot(LarvByJarDat$LarvaeDiamum ~ LarvByJarDat$JarTrt * LarvByJarDat$ParentTrt, 
        xlim = c(0, 5),
        xlab = "Larvae Treatment",
        ylab = "Diameter (um)",
        names = c("Control", "OA Exposed", "Control", "OA Exposed"), 
        col = c("powderblue", "red"),
        add = TRUE)

par(mfcol=c(1,1))

Try to analyze survival data

boxplot(PropSurvived~ParentTrt*JarTrt, data=LarvByJarDat)

boxplot(PropSurvived~CrossID*JarTrt, data=LarvByJarDat)

#I don't know what we want to do about the larvae with proportion survived greater than one. For now, I will remove them. 
LarvByJarDatSurv<- subset(LarvByJarDat, PropSurvived<=1)
boxplot(PropSurvived~ParentTrt*JarTrt, data=LarvByJarDatSurv)

#create linear mixed model
surv1<- lmer(PropSurvived~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|TrtTankID)+(1|AccTankID), data=LarvByJarDatSurv)

survstep<- step(surv1)
print(survstep)
## 
## Random effects:
##             Chi.sq Chi.DF elim.num p.value
## AccTankID     0.01      1        1  0.9259
## FemaleID      0.07      1        2  0.7923
## TrtTankID     1.20      1        3  0.2734
## MaleID        1.13      1        4  0.2875
## CrossID     249.06      1     kept  <1e-07
## JarSeatable   5.64      1     kept  0.0175
## 
## Fixed effects:
##                           Sum Sq Mean Sq NumDF  DenDF F.value elim.num
## WCa_pHDIC:JarOmegaCalcite 0.0001  0.0001     1 190.04  0.0103        1
## WCa_pHDIC                 0.0069  0.0069     1  38.00  1.2401        2
## JarOmegaCalcite           0.2660  0.2660     1 191.19 48.0394     kept
##                           Pr(>F)
## WCa_pHDIC:JarOmegaCalcite 0.9192
## WCa_pHDIC                 0.2724
## JarOmegaCalcite           <1e-07
## 
## Least squares means:
##      Estimate Standard Error DF t-value Lower CI Upper CI p-value
## 
##  Differences of LSMEANS:
##      Estimate Standard Error DF t-value Lower CI Upper CI p-value
## 
## Final model:
## lme4::lmer(formula = PropSurvived ~ JarOmegaCalcite + (1 | CrossID) + 
##     (1 | JarSeatable), data = LarvByJarDatSurv)
#final model chosen by step function: PropSurvived~JarOmegaCalcite+ (1|CrossID)+(1|JarSeatable)
survfin<- lmer(PropSurvived~JarOmegaCalcite+(1|CrossID)+(1|JarSeatable), data=LarvByJarDatSurv)

plot(survfin) #this looks fine to me

qqnorm(resid(survfin))
qqline(resid(survfin)) #I think this is fine

acf(survfin)
## Error in complete.cases(object): invalid 'type' (S4) of argument
summary(survfin)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: 
## PropSurvived ~ JarOmegaCalcite + (1 | CrossID) + (1 | JarSeatable)
##    Data: LarvByJarDatSurv
## 
## REML criterion at convergence: -406.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.7631 -0.4894  0.0348  0.5565  2.3172 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  CrossID     (Intercept) 0.026348 0.16232 
##  JarSeatable (Intercept) 0.000337 0.01836 
##  Residual                0.005536 0.07441 
## Number of obs: 236, groups:  CrossID, 41; JarSeatable, 3
## 
## Fixed effects:
##                  Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)       0.54840    0.02897  33.45000  18.932  < 2e-16 ***
## JarOmegaCalcite   0.07939    0.01145 191.19000   6.931 6.23e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## JarOmegClct -0.263
#Jar treatment significantly affects survival

ggplot(LarvByJarDatSurv,
       aes(fill = WCa_pHDIC, y = PropSurvived, x = JarOmegaCalcite)) +
  geom_point(aes(colour=WCa_pHDIC)) +
  ggtitle("Larvae survival by larvae treatment")+ 
  ylab("Proportion of Larvae that Survived")+
  theme_classic()

meansurv<- ddply(LarvByJarDatSurv, .(JarTrt, CrossID), numcolwise(mean, na.rm=T))
sesurv<- ddply(LarvByJarDatSurv, .(JarTrt, CrossID), numcolwise(se, na.rm=T))
#make a shorter name for the CrossIDs
meansurv$Cross<- substr(meansurv$CrossID, 1, 9)
sesurv$Cross<- substr(sesurv$CrossID, 1, 9)

ggplot(data = meansurv, aes(x = Cross, y = PropSurvived, fill=JarTrt)) +
  geom_bar(stat="identity", aes(fill=JarTrt), position="dodge")+
  labs(x = "Cross ID", y = "Proportion of Larvae that Survived") +
  scale_fill_manual(name = "Larvae Treatment", labels = c("Control", "OA Exposed"), values = c("skyblue2", "red2")) +
  geom_errorbar(aes(ymin= meansurv$PropSurvived-sesurv$PropSurvived, ymax=meansurv$PropSurvived+sesurv$PropSurvived),width=0.2, position=position_dodge(.9))+
  theme(axis.text.x=element_text(angle=90))
## Warning: Removed 3 rows containing missing values (geom_errorbar).

#EF03_EM05 only has control because the exposed all had survival >1. 

#now just plot the JarTrt
meanjarsurv<- ddply(LarvByJarDatSurv, .(JarTrt), numcolwise(mean, na.rm=T))
sejarsurv<- ddply(LarvByJarDatSurv, .(JarTrt), numcolwise(se, na.rm=T))

ggplot(data = meanjarsurv, aes(x = JarTrt, y = PropSurvived)) +
  geom_bar(stat="identity", aes(fill="gray45"), position="dodge")+
  labs(x = "Jar Treatment", y = "Proportion of Larvae that Survived") +
  scale_fill_manual(values = "gray45") +
  geom_errorbar(aes(ymin= meanjarsurv$PropSurvived-sejarsurv$PropSurvived, ymax=meanjarsurv$PropSurvived+sejarsurv$PropSurvived),width=0.2, position=position_dodge(.9))+
  theme(legend.position = "none", panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), panel.background = element_blank(), 
        axis.line = element_line(colour = "black"))

#try to use the cross difference code to look at the difference in survival.
survdiff <- data.frame(tapply(LarvByJarDat$PropSurvived, list(LarvByJarDat$CrossID, LarvByJarDat$JarTrt), mean)) %>%
  rownames_to_column("CrossID") %>%
  mutate(CrossTemp = CrossID) %>%
  mutate(ParentTrt = ifelse(startsWith(CrossID, "E"), "Exposed", "Control")) %>%
  separate(CrossTemp, c("FemaleID", "MaleID", "BlockID", "Fourth"), sep="_") %>% select(-Fourth) %>%
  mutate(Diff = Exposed - Control)
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1 rows [1].
survdiff$CrossID<- as.factor(survdiff$CrossID)
survdiff$FemaleID<-as.factor(survdiff$FemaleID)
survdiff$MaleID<-as.factor(survdiff$MaleID)

#look at cross differences by Male and Female ID
ggplot(data = survdiff, aes(x = FemaleID, y = Diff)) + 
  geom_point(aes(colour=MaleID)) + 
  labs(x = "Female ID", y = "Change in larvae survival per family\nfrom control to OA conditions") + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), panel.background = element_blank(), 
        axis.line = element_line(colour = "black"), panel.border=element_rect(colour="black", fill=NA))+
   theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
## Warning: Removed 70 rows containing missing values (geom_point).

#get the means of the differences to visualize overall differences
meansurvdiff<- ddply(survdiff, .(ParentTrt), numcolwise(mean, na.rm=T))
sesurvdiff<- ddply(survdiff, .(ParentTrt), numcolwise(se, na.rm=T))

ggplot(data = meansurvdiff, aes(x = ParentTrt, y = Diff)) + 
  geom_bar(stat="identity", aes(fill="gray45"), position="dodge") + 
  labs(x = "Parental Treatment", y = "Change in larvae survival per family\nfrom control to OA conditions") + 
  scale_fill_manual(values = "gray45") + 
  geom_errorbar(aes(ymin= meansurvdiff$Diff-sesurvdiff$Diff, ymax=meansurvdiff$Diff+sesurvdiff$Diff),width=0.2, position=position_dodge(.9))+
  theme(legend.position = "none", panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), panel.background = element_blank(), 
        axis.line = element_line(colour = "black"))

diffsurvttest<- aov(Diff~ParentTrt, data=survdiff)
anova(diffsurvttest) #no significant effect of parental treatment
## Analysis of Variance Table
## 
## Response: Diff
##           Df  Sum Sq  Mean Sq F value Pr(>F)
## ParentTrt  1 0.01155 0.011555  0.7309 0.3974
## Residuals 42 0.66397 0.015809

Explore cilia extrusion.

#Plot cilia extrusion by size to see if there is a relationship (see Waldbusser et al 2015)
#to do this Elise looked at a subset of Larvae for each jar treatment. 
par(mfcol=c(1,1))
testcil<- read.csv("../data/LarvMorphTest.csv")
testcil$CilExtruded<- as.factor(testcil$CilExtruded)
testcilsub<- subset(testcil, CilExtruded !="")
testcilsub$Flag<- as.factor(testcilsub$Flag)
testcilsub<- subset(testcilsub, Flag !="TRUE")
#take a look at the data
ggplot(testcilsub,
       aes(fill = JarTrt, y = MaxFeretDiamum, x = CilExtruded)) +
  geom_point(aes( shape=JarTrt, colour=JarTrt)) +
  ggtitle("Larvae cilia extrusion by size")+ 
  ylab("Larvae Diameter (um)")+
  facet_grid(~JarTrt)+
  theme_classic()

#test for a relationship between size and cilia extruded within each jar treatment type. 
exp<- subset(testcilsub, JarTrt=="Exposed")
con<- subset(testcilsub, JarTrt=="Control")

expcil1<-glm(CilExtruded~MaxFeretDiamum, data=exp, family=binomial)
summary(expcil1) #no significant relationship between size and cilia extrusion
## 
## Call:
## glm(formula = CilExtruded ~ MaxFeretDiamum, family = binomial, 
##     data = exp)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7172   0.7201   0.7444   0.7527   0.7725  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)
## (Intercept)    0.566735   3.046585   0.186    0.852
## MaxFeretDiamum 0.008725   0.047015   0.186    0.853
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 182.22  on 163  degrees of freedom
## Residual deviance: 182.18  on 162  degrees of freedom
## AIC: 186.18
## 
## Number of Fisher Scoring iterations: 4
boxplot(MaxFeretDiamum~CilExtruded,  data=exp)

#now test the control jars for relationship between size and cilia extrusion. 
concil1<-glm(CilExtruded~MaxFeretDiamum, data=con, family=binomial)
summary(concil1) #looks like there is a significant relationship between size and cilia extrusion
## 
## Call:
## glm(formula = CilExtruded ~ MaxFeretDiamum, family = binomial, 
##     data = con)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9324  -0.3705  -0.3098  -0.2538   2.8520  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)  
## (Intercept)     14.8599     7.5190   1.976   0.0481 *
## MaxFeretDiamum  -0.2376     0.1026  -2.316   0.0206 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 82.805  on 179  degrees of freedom
## Residual deviance: 77.639  on 178  degrees of freedom
## AIC: 81.639
## 
## Number of Fisher Scoring iterations: 6
boxplot(MaxFeretDiamum~CilExtruded,  data=con)

Export Figures

plots.dir.path<- list.files(tempdir(), pattern="rs-graphics", full.names= TRUE)
plots.png.paths<- list.files(plots.dir.path, pattern=".png", full.names=TRUE)
file.copy(from=plots.png.paths, to="../results")
## logical(0)

From data exploration:

lm1<- lm(LarvaeDiamum~ParentTrt*JarTrt+EggDiamum, data=LarvaeDat)
par(mfcol=c(2,2))
plot(lm1) #does not appear to meet assumptions of normality

par(mfcol=c(1,1))

#plot the residuals of lm1 vs. Female and Male ID
plot(resid(lm1)~LarvaeDat$FemaleID)
abline(0,0)

plot(resid(lm1)~LarvaeDat$MaleID)
abline(0,0)

#look at autocorrelation of lm1; elise is still struggling with this

#add female ID to the model as a random effect, but take out EggDiamum then plot the residuals against MaleID
lm1b<- lmer(LarvaeDiamum~ParentTrt*JarTrt+ (1|FemaleID), data=LarvaeDat)
summary(lm1b)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: LarvaeDiamum ~ ParentTrt * JarTrt + (1 | FemaleID)
##    Data: LarvaeDat
## 
## REML criterion at convergence: 21158.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.4403 -0.5024  0.0615  0.6160  4.0917 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  FemaleID (Intercept)  6.925   2.632   
##  Residual             15.858   3.982   
## Number of obs: 3758, groups:  FemaleID, 28
## 
## Fixed effects:
##                              Estimate Std. Error        df t value
## (Intercept)                   76.5288     0.7521   28.0000 101.748
## ParentTrt2800                 -0.2925     1.0324   28.0000  -0.283
## JarTrtExposed                -10.9849     0.2096 3751.0000 -52.420
## ParentTrt2800:JarTrtExposed    1.1578     0.3066 3753.0000   3.776
##                             Pr(>|t|)    
## (Intercept)                  < 2e-16 ***
## ParentTrt2800               0.779016    
## JarTrtExposed                < 2e-16 ***
## ParentTrt2800:JarTrtExposed 0.000162 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) PrT2800 JrTrtE
## PrntTrt2800 -0.729               
## JarTrtExpsd -0.193  0.141        
## PrT2800:JTE  0.132 -0.217  -0.683
plot(lm1b) #this looks okay, seems to meet assumption of linearity and homoscedasticity

qqnorm(resid(lm1b))
qqline(resid(lm1b)) #doesn't look like it meets the assumption of normality

#plot the residuals of lm1b vs. Male ID
plot(resid(lm1b)~LarvaeDat$MaleID)
abline(0,0)

#I think we probably want to include the male ID in the model

lm1c<- lmer(LarvaeDiamum~ParentTrt*JarTrt+ (1|FemaleID)+ (1|MaleID), data=LarvaeDat)
summary(lm1c)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: LarvaeDiamum ~ ParentTrt * JarTrt + (1 | FemaleID) + (1 | MaleID)
##    Data: LarvaeDat
## 
## REML criterion at convergence: 20435.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.1926 -0.5092  0.0507  0.5987  4.5559 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  FemaleID (Intercept)  8.920   2.987   
##  MaleID   (Intercept)  8.831   2.972   
##  Residual             12.751   3.571   
## Number of obs: 3758, groups:  FemaleID, 28; MaleID, 24
## 
## Fixed effects:
##                              Estimate Std. Error        df t value
## (Intercept)                   76.7310     1.2200   41.0000  62.893
## ParentTrt2800                 -0.6219     1.7100   40.0000  -0.364
## JarTrtExposed                -11.0094     0.1883 3714.0000 -58.475
## ParentTrt2800:JarTrtExposed    1.1690     0.2755 3716.0000   4.243
##                             Pr(>|t|)    
## (Intercept)                  < 2e-16 ***
## ParentTrt2800                  0.718    
## JarTrtExposed                < 2e-16 ***
## ParentTrt2800:JarTrtExposed 2.26e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) PrT2800 JrTrtE
## PrntTrt2800 -0.713               
## JarTrtExpsd -0.116  0.083        
## PrT2800:JTE  0.079 -0.120  -0.683
plot(lm1c) #this looks okay, seems to meet assumption of linearity and homoscedasticity

qqnorm(resid(lm1c))
qqline(resid(lm1c)) #doesn't look like it meets the assumption of normality

#plot the residuals of lm1c vs. TimeFiltered
plot(resid(lm1c)~LarvaeDat$TimeFilter)
abline(0,0)

Move on to next linear model to at least get the code down

lm2<-lmer(LarvaeDiamum~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID), data=LarvaeDat)
#check assumptions for lm2
#Assumption 1: linearity: 
plot(resid(lm2)~LarvaeDat$ParentTrt)

plot(resid(lm2)~LarvaeDat$JarTrt) 

#seem to meet assumption of linearity

#Assumption 2: Homoscedasticity
plot(lm2)# seems to meet assumption of homoscedasticity but check with Levene's test: 

leveneTest(resid(lm2)~LarvaeDat$ParentTrt*LarvaeDat$JarTrt, center=mean) #p<0.05 so does not meet assumption of homoscedasticity
## Levene's Test for Homogeneity of Variance (center = mean)
##         Df F value    Pr(>F)    
## group    3  48.486 < 2.2e-16 ***
##       3754                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Assumption 3: residuals are normally distributed
qqnorm(resid(lm2))
qqline(resid(lm2)) #does not meet assumption of normality

#will need to try to transform the data. I have been using the Larvae dataframe, not the LarbyJar dataframe. Is that the right call? 

lm2t<-lmer(log(LarvaeDiamum)~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID), data=LarvaeDat)
#check assumptions for lm2
#Assumption 1: linearity: 
plot(resid(lm2t)~LarvaeDat$ParentTrt)

plot(resid(lm2t)~LarvaeDat$JarTrt) 

#seem to meet assumption of linearity

#Assumption 2: Homoscedasticity
plot(lm2t)# seems to meet assumption of homoscedasticity but check with Levene's test: 

leveneTest(resid(lm2t)~LarvaeDat$ParentTrt*LarvaeDat$JarTrt, center=mean) #p<0.05 still does not meet assumption of homoscedasticity
## Levene's Test for Homogeneity of Variance (center = mean)
##         Df F value    Pr(>F)    
## group    3  76.191 < 2.2e-16 ***
##       3754                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Assumption 3: residuals are normally distributed
qqnorm(resid(lm2t))
qqline(resid(lm2t)) #still does not meet assumption of normality

#currently does not meet assumptions, but still write the code for looking at residuals for lm2 
plot(resid(lm2t)~LarvaeDat$CrossID) # I don't see anything that would suggest we should account for this. 
abline(0,0)

plot(resid(lm2t)~LarvaeDat$JarID) #some of the jars have unusually high or low resids, so will have to account for this. 
abline(0,0)

plot(resid(lm2t)~LarvaeDat$TimeFilter) #we may want to account for this
abline(0,0)

plot(resid(lm2t)~LarvaeDat$BlockID.x) #does not seem to need to be accounted for
## Error in (function (formula, data = NULL, subset = NULL, na.action = na.fail, : invalid type (NULL) for variable 'LarvaeDat$BlockID.x'
#make a new model that accounts for JarID and TimeFilter then check assumptions again
lm3 <- lmer(LarvaeDiamum~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID) + (1|JarID) + (1|TimeFilter), data=LarvaeDat)

#check assumptions for lm3
#Assumption 1: linearity: 
plot(resid(lm3)~LarvaeDat$ParentTrt)

plot(resid(lm3)~LarvaeDat$JarTrt) 

#seem to meet assumption of linearity

#Assumption 2: Homoscedasticity
plot(lm3)# seems to meet assumption of homoscedasticity but check with Levene's test: 

leveneTest(resid(lm3)~LarvaeDat$ParentTrt*LarvaeDat$JarTrt, center=mean) #p<0.05 still does not meet assumption of homoscedasticity
## Levene's Test for Homogeneity of Variance (center = mean)
##         Df F value    Pr(>F)    
## group    3  43.401 < 2.2e-16 ***
##       3754                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Assumption 3: residuals are normally distributed
qqnorm(resid(lm3))
qqline(resid(lm3)) #still does not meet assumption of normality

#try transforming the data again
lm3t <- lmer(log(LarvaeDiamum)~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID) + (1|JarID) + (1|TimeFilter), data=Larvae)

#check assumptions for lm3
#Assumption 1: linearity: 
plot(resid(lm3t)~Larvae$ParentTrt)

plot(resid(lm3t)~Larvae$JarTrt) 

#seem to meet assumption of linearity

#Assumption 2: Homoscedasticity
plot(lm3t)# seems to meet assumption of homoscedasticity but check with Levene's test: 

leveneTest(resid(lm3t)~Larvae$ParentTrt*Larvae$JarTrt, center=mean) #p<0.05 still does not meet assumption of homoscedasticity
## Levene's Test for Homogeneity of Variance (center = mean)
##         Df F value    Pr(>F)    
## group    3  73.343 < 2.2e-16 ***
##       3754                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Assumption 3: residuals are normally distributed
qqnorm(resid(lm3t))
qqline(resid(lm3t)) #still doesn't meet assumption of normality

Retry the above models but use the LarvByJar dataset

ggplot(LarvByJar,
       aes(fill = ParentTrt, y = LarvaeDiamum, x = EggDiamum)) +
  geom_point(aes( shape=JarTrt, colour=ParentTrt)) +
  ggtitle("Larvae diameter by egg diameter")+ 
  ylab("Larvae Diameter (um)")+
  facet_grid(~JarTrt)+
  theme_classic()
## Warning: Removed 115 rows containing missing values (geom_point).

#start with a simple linear model 
jar1<- lm(LarvaeDiamum~ParentTrt*JarTrt+EggDiamum, data=LarvByJar, na.action="na.exclude")
par(mfcol=c(2,2))
plot(jar1) #does not appear to meet assumptions of normality, try transforming

par(mfcol=c(1,1))

#plot the residuals of lm1 vs. Female and Male ID
plot(resid(jar1)~LarvByJar$FemaleID)
abline(0,0)

plot(resid(jar1)~LarvByJar$MaleID)
abline(0,0)

#we will need to account for female and male ID

jar2<-lmer(LarvaeDiamum~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID), data=LarvByJar, na.action = "na.exclude")
#check assumptions for lm2
#Assumption 1: linearity: 
plot(resid(jar2)~LarvByJar$ParentTrt)

plot(resid(jar2)~LarvByJar$JarTrt) 

#seem to meet assumption of linearity

#Assumption 2: Homoscedasticity
plot(jar2)# seems to meet assumption of homoscedasticity but check with Levene's test: 

leveneTest(resid(jar2)~LarvByJar$ParentTrt*LarvByJar$JarTrt, center=mean) #p<0.05 so does not meet assumption of homoscedasticity
## Levene's Test for Homogeneity of Variance (center = mean)
##        Df F value   Pr(>F)   
## group   3  4.5575 0.003765 **
##       373                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Assumption 3: residuals are normally distributed
qqnorm(resid(jar2))
qqline(resid(jar2)) #does not meet assumption of normality

#will need to try to transform the data. I have been using the Larvae dataframe, not the LarbyJar dataframe. Is that the right call? 

jar2t<-lmer(log(LarvaeDiamum)~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID), data=LarvByJar, na.action= "na.exclude")
#check assumptions for lm2
#Assumption 1: linearity: 
plot(resid(jar2t)~LarvByJar$ParentTrt)

plot(resid(jar2t)~LarvByJar$JarTrt) 

#seem to meet assumption of linearity

#Assumption 2: Homoscedasticity
plot(jar2t)# seems to meet assumption of homoscedasticity but check with Levene's test: 

leveneTest(resid(jar2t)~LarvByJar$ParentTrt*LarvByJar$JarTrt, center=mean) #p<0.05 still does not meet assumption of homoscedasticity
## Levene's Test for Homogeneity of Variance (center = mean)
##        Df F value    Pr(>F)    
## group   3  6.2957 0.0003553 ***
##       373                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Assumption 3: residuals are normally distributed
qqnorm(resid(jar2t))
qqline(resid(jar2t)) #still does not meet assumption of normality

#currently does not meet assumptions, but still write the code for looking at residuals for jar2 
plot(resid(jar2t)~LarvByJar$CrossID) # It seems that we will want to account for this. 
abline(0,0)

plot(resid(jar2t)~LarvByJar$TimeFilter) #we will want to account for this
abline(0,0)

plot(resid(jar2t)~LarvByJar$BlockID.x)#I think we will need to account for this
## Error in (function (formula, data = NULL, subset = NULL, na.action = na.fail, : invalid type (NULL) for variable 'LarvByJar$BlockID.x'
#make a new model that accounts for CrossID, TimeFilter, and block then check assumptions again
jar3 <- lmer(LarvaeDiamum~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID) + (1|CrossID) + (1|TimeFilter) +(1|BlockID.x), data=LarvByJar, na.action="na.exclude")
## Error in eval(predvars, data, env): object 'BlockID.x' not found
#check assumptions for lm3
#Assumption 1: linearity: 
plot(resid(jar3)~LarvByJar$ParentTrt)
## Error in resid(jar3): object 'jar3' not found
plot(resid(jar3)~LarvByJar$JarTrt) 
## Error in resid(jar3): object 'jar3' not found
#seem to meet assumption of linearity

#Assumption 2: Homoscedasticity
plot(jar3)# seems to meet assumption of homoscedasticity but check with Levene's test: 
## Error in plot(jar3): object 'jar3' not found
leveneTest(resid(jar3)~LarvByJar$ParentTrt*LarvByJar$JarTrt, center=mean) #p<0.05 does not meet assumption of homoscedasticity
## Error in resid(jar3): object 'jar3' not found
#Assumption 3: residuals are normally distributed
qqnorm(resid(jar3))
## Error in resid(jar3): object 'jar3' not found
qqline(resid(jar3)) #does not meet assumption of normality
## Error in resid(jar3): object 'jar3' not found

Subset the data to only have Block 1 to see if that might resolve some of the abnormality problems

B1<- subset(Larvae, BlockID.x =="B1")
## Error in eval(e, x, parent.frame()): object 'BlockID.x' not found
ggplot(B1,
       aes(fill = ParentTrt, y = LarvaeDiamum, x = EggDiamum)) +
  geom_point(aes( shape=JarTrt, colour=ParentTrt)) +
  ggtitle("Larvae diameter by egg diameter")+ 
  ylab("Larvae Diameter (um)")+
  facet_grid(~JarTrt)+
  theme_classic()
## Error in ggplot(B1, aes(fill = ParentTrt, y = LarvaeDiamum, x = EggDiamum)): object 'B1' not found
#start with a simple linear model 
larv1<- lm(LarvaeDiamum~ParentTrt*JarTrt+EggDiamum, data=B1)
## Error in is.data.frame(data): object 'B1' not found
par(mfcol=c(2,2))
plot(larv1) #does not seem to be normally distributed
## Error in plot(larv1): object 'larv1' not found
par(mfcol=c(1,1))

#plot the residuals of lm1 vs. Female and Male ID
plot(resid(larv1)~B1$FemaleID)
## Error in resid(larv1): object 'larv1' not found
plot(resid(larv1)~B1$MaleID)
## Error in resid(larv1): object 'larv1' not found
#does not seem to be a problem

Move on to next linear model to at least get the code down

B1_2<-lmer(LarvaeDiamum~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID), data=B1)
## Error: 'data' not found, and some variables missing from formula environment
#check assumptions for B1_2
#Assumption 1: linearity: 
plot(resid(B1_2)~B1$ParentTrt)
## Error in resid(B1_2): object 'B1_2' not found
plot(resid(B1_2)~B1$JarTrt) 
## Error in resid(B1_2): object 'B1_2' not found
#seem to meet assumption of linearity

#Assumption 2: Homoscedasticity
plot(B1_2)# seems to meet assumption of homoscedasticity but check with Levene's test: 
## Error in plot(B1_2): object 'B1_2' not found
leveneTest(resid(B1_2)~B1$ParentTrt*B1$JarTrt, center=mean) #p<0.05 so does not meet assumption of homoscedasticity
## Error in resid(B1_2): object 'B1_2' not found
#Assumption 3: residuals are normally distributed
qqnorm(resid(B1_2))
## Error in resid(B1_2): object 'B1_2' not found
qqline(resid(B1_2)) #does not meet assumption of normality
## Error in resid(B1_2): object 'B1_2' not found
#will need to try to transform the data.

B1_2t<-lmer(log(LarvaeDiamum)~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID), data=B1)
## Error: 'data' not found, and some variables missing from formula environment
#check assumptions for B1_2t
#Assumption 1: linearity: 
plot(resid(B1_2t)~B1$ParentTrt)
## Error in resid(B1_2t): object 'B1_2t' not found
plot(resid(B1_2t)~B1$JarTrt) 
## Error in resid(B1_2t): object 'B1_2t' not found
#seem to meet assumption of linearity

#Assumption 2: Homoscedasticity
plot(B1_2t)# seems to meet assumption of homoscedasticity but check with Levene's test: 
## Error in plot(B1_2t): object 'B1_2t' not found
leveneTest(resid(B1_2t)~B1$ParentTrt*B1$JarTrt, center=mean) #p<0.05 still does not meet assumption of homoscedasticity
## Error in resid(B1_2t): object 'B1_2t' not found
#Assumption 3: residuals are normally distributed
qqnorm(resid(B1_2t))
## Error in resid(B1_2t): object 'B1_2t' not found
qqline(resid(B1_2t)) #looks better maybe
## Error in resid(B1_2t): object 'B1_2t' not found
#code for looking at residuals for B1_2t 
plot(resid(B1_2t)~B1$CrossID) # I don't see anything that would suggest we should account for this. 
## Error in resid(B1_2t): object 'B1_2t' not found
abline(0,0)
## Error in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): plot.new has not been called yet
plot(resid(B1_2t)~B1$JarID)
## Error in resid(B1_2t): object 'B1_2t' not found
abline(0,0)
## Error in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): plot.new has not been called yet
plot(resid(B1_2t)~B1$TimeFilter) #we may want to account for this
## Error in resid(B1_2t): object 'B1_2t' not found
abline(0,0)
## Error in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): plot.new has not been called yet
plot(resid(B1_2t)~B1$BlockID.x) #does not seem to need to be accounted for
## Error in resid(B1_2t): object 'B1_2t' not found
#make a new model that accounts for JarID and TimeFilter then check assumptions again
B1_3 <- lmer(LarvaeDiamum~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID) + (1|JarID) + (1|TimeFilter), data=B1)
## Error: 'data' not found, and some variables missing from formula environment
#check assumptions for lm3
#Assumption 1: linearity: 
plot(resid(B1_3)~B1$ParentTrt)
## Error in resid(B1_3): object 'B1_3' not found
plot(resid(B1_3)~B1$JarTrt) 
## Error in resid(B1_3): object 'B1_3' not found
#seem to meet assumption of linearity

#Assumption 2: Homoscedasticity
plot(B1_3)# seems to meet assumption of homoscedasticity but check with Levene's test: 
## Error in plot(B1_3): object 'B1_3' not found
leveneTest(resid(B1_3)~B1$ParentTrt*B1$JarTrt, center=mean) #p<0.05 does not meet assumption of homoscedasticity
## Error in resid(B1_3): object 'B1_3' not found
#Assumption 3: residuals are normally distributed
qqnorm(resid(B1_3))
## Error in resid(B1_3): object 'B1_3' not found
qqline(resid(B1_3)) #still does not meet assumption of normality
## Error in resid(B1_3): object 'B1_3' not found
#try transforming the data again
B1_3t <- lmer(log(LarvaeDiamum)~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID) + (1|JarID) + (1|TimeFilter), data=B1)
## Error: 'data' not found, and some variables missing from formula environment
#check assumptions for lm3
#Assumption 1: linearity: 
plot(resid(B1_3t)~B1$ParentTrt)
## Error in resid(B1_3t): object 'B1_3t' not found
plot(resid(B1_3t)~B1$JarTrt) 
## Error in resid(B1_3t): object 'B1_3t' not found
#seem to meet assumption of linearity

#Assumption 2: Homoscedasticity
plot(B1_3t)# seems to meet assumption of homoscedasticity but check with Levene's test: 
## Error in plot(B1_3t): object 'B1_3t' not found
leveneTest(resid(B1_3t)~B1$ParentTrt*B1$JarTrt, center=mean) #p<0.05 still does not meet assumption of homoscedasticity
## Error in resid(B1_3t): object 'B1_3t' not found
#Assumption 3: residuals are normally distributed
qqnorm(resid(B1_3t))
## Error in resid(B1_3t): object 'B1_3t' not found
qqline(resid(B1_3t)) #still doesn't meet assumption of normality
## Error in resid(B1_3t): object 'B1_3t' not found

I want to try to do the stats with the pH values from the jars. I don’t have the pH values for the adult tank water chem yet

B1tlm <- lmer(log(LarvaeDiamum)~ParentTrt*JarpH + (1|FemaleID)+(1|MaleID) + (1|JarID) + (1|TimeFilter), data=B1)
## Error: 'data' not found, and some variables missing from formula environment
#check assumptions for lm3
#Assumption 1: linearity: 
plot(resid(B1tlm)~B1$ParentTrt)
## Error in resid(B1tlm): object 'B1tlm' not found
plot(resid(B1tlm)~B1$JarTrt) 
## Error in resid(B1tlm): object 'B1tlm' not found
#seem to meet assumption of linearity

#Assumption 2: Homoscedasticity
plot(B1tlm)# seems to meet assumption of homoscedasticity but check with Levene's test: 
## Error in plot(B1tlm): object 'B1tlm' not found
leveneTest(resid(B1tlm)~B1$ParentTrt*B1$JarTrt, center=mean) #p<0.05 still does not meet assumption of homoscedasticity
## Error in resid(B1tlm): object 'B1tlm' not found
#Assumption 3: residuals are normally distributed
qqnorm(resid(B1tlm))
## Error in resid(B1tlm): object 'B1tlm' not found
qqline(resid(B1tlm)) #doesn't meet assumption of normality
## Error in resid(B1tlm): object 'B1tlm' not found

BELOW CODE HASN’T BEEN CHECKED IN MARCH

Subset data by larvae treatment and parent treatment

subset the data into exposed and control parent treatments

exppar <- LarvaeMorph %>% filter(CrossType == “Exposed”) conpar <- LarvaeMorph %>% filter(CrossType == “Control”) #subset the data into exposed and control larvae treatments explar <- LarvaeMorph %>% filter(JarTrt == “Exposed”) conlar <- LarvaeMorph %>% filter(JarTrt == “Control”)

subset the data into exposed and control parent treatments for calcification data

expparcalc <- LarvaeCalc %>% filter(CrossType == “Exposed”) conparcalc <- LarvaeCalc %>% filter(CrossType == “Control”) #subset the data into exposed and control larvae treatments explarcalc <- LarvaeCalc %>% filter(JarTrt == “Exposed”) conlarcalc <- LarvaeCalc %>% filter(JarTrt == “Control”)

Start by looking at the relationship between egg size and larvae size

ggplot(data=LarvaeCalc, aes(x=EggAreauM2, y=SurfaceAreaum2, fill=JarTrt))+ geom_point(aes(shape=CrossType, colour=JarTrt))+ theme_classic()

egglarvsize<- lm(SurfaceAreaum2~EggAreauM2JarTrtCrossType, data=LarvaeCalc) plot(egglarvsize) #looks like it meets assumptions summary(egglarvsize) #egg area is not significant, but JarTrt is p=0.0389

need to explore this further, because if we look at length of the larvae then egg area is not signficant, but if we look at oyster surface area, then egg area is significant.

Look at data that might be count or weight outliers

plot(F1LarvaeCount~PercentFert, data=LarvaeCalc, col=JarTrt) ggplot(data=LarvaeCalc, aes(x=mLJarActual, y=F1LarvaeCount))+ geom_point(aes(shape=JarTrt, colour=PercentFert))

plot(F1TotalLarvaeWtg~F1LarvaeCount, data=LarvaeCalc, col=JarTrt) plot(F1WtPerLarvae~F1LarvaeCount, data=LarvaeCalc, col=JarTrt) text(F1WtPerLarvae~F1LarvaeCount, labels=JarID, data=LarvaeCalc, font=2, cex=.9)

The plots below can be used for all morphology data: length, perimeter, surface area

plot data by jar that are grouped by cross ID; but use JarSeatable instead of JarID so I can plot both exposed and control jars over one another

start with larvae of control parents

ggplot(data = conpar, aes(x = JarSeatable, y = MaxFeretDiamum)) + geom_point(aes(shape=JarTrt, colour=JarTrt))+ labs(x = “Replicate”, y = “Shell Length (um)”, title = “Larvae length control parents”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_color_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + scale_shape_manual(name=“Larvae Treatment”, labels=c(“Control”,“OA”), values =c(19,17))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border=element_rect(color=“black”, fill=NA), panel.background = element_rect(colour=“white”), axis.line = element_line(colour = “black”)) + facet_grid(.~CrossID)

plot larvae of exposed parents

ggplot(data = exppar, aes(x = JarSeatable, y = MaxFeretDiamum)) + geom_point(aes(shape=JarTrt, colour=JarTrt))+ labs(x = “Replicate”, y = “Shell Length (um)”, title = “Larvae length exposed parents”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_color_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + scale_shape_manual(name=“Larvae Treatment”, labels=c(“Control”,“OA”), values =c(19,17))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border=element_rect(color=“black”, fill=NA), panel.background = element_rect(colour=“white”), axis.line = element_line(colour = “black”)) + facet_grid(.~CrossID)

same plot structure as above, but using boxplots instead of points for each larvae measured

plot larvae of control parents

ggplot(data = conpar, aes(x = JarSeatable, y = MaxFeretDiamum)) + geom_boxplot(aes(colour=JarTrt))+ labs(x = “Replicate”, y = “Shell Length (um)”, title = “Larvae length control parents”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_color_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border=element_rect(color=“black”, fill=NA), panel.background = element_rect(colour=“white”), axis.line = element_line(colour = “black”)) + facet_grid(.~CrossID)

plot larvae of exposed parents

ggplot(data = exppar, aes(x = JarSeatable, y = MaxFeretDiamum)) + geom_boxplot(aes(colour=JarTrt))+ labs(x = “Replicate”, y = “Shell Length (um)”, title = “Larvae length exposed parents”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_color_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border=element_rect(color=“black”, fill=NA), panel.background = element_rect(colour=“white”), axis.line = element_line(colour = “black”)) + facet_grid(.~CrossID)

Make boxplots by parental cross, plotting both larvae treatments over one another. Can be used for all morphology parameters: surface area, length,

ggplot(data = LarvaeMorph, aes(x = CrossID, y = MaxFeretDiamum)) + geom_boxplot(aes(colour=JarTrt)) + labs(x = “Parental Cross”, y = “Shell Length (um)”, title = “Larvae lengths”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_color_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”))+ scale_fill_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + scale_y_continuous(breaks = seq(floor(min(LarvaeMorph\(MaxFeretDiamum)), ceiling(max(LarvaeMorph\)MaxFeretDiamum)), 5)) +

ggplot(data = colar, aes(x = CrossID, y = MaxFeretDiamum, fill = ParTrt)) + geom_boxplot() + labs(x = “Parental Cross”, y = “Shell Length (um)”, title = “Control Larvae”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_fill_manual(name = “Parental Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + scale_y_continuous(breaks = seq(floor(min(larvmorph\(MaxFeretDiamum)), ceiling(max(larvmorph\)MaxFeretDiamum)), 5), limits = c(floor(min(larvmorph\(MaxFeretDiamum)), ceiling(max(larvmorph\)MaxFeretDiamum)))) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = “black”))

ggplot(data = exlar, aes(x = CrossID, y = MaxFeretDiamum, fill = ParTrt)) + geom_boxplot() + labs(x = “Parental Cross”, y = “Shell Length (um)”, title = “OA Larvae”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_fill_manual(name = “Parental Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + scale_y_continuous(breaks = seq(floor(min(larvmorph\(MaxFeretDiamum)), ceiling(max(larvmorph\)MaxFeretDiamum)), 5), limits = c(floor(min(larvmorph\(MaxFeretDiamum)), ceiling(max(larvmorph\)MaxFeretDiamum)))) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = “black”), panel.border=element_rect(color=“black”,fill=NA))+ facet_grid(.~CrossType, scale=“free_x”)

Plot calcification data

plot data by jar that are grouped by cross ID; but use JarSeatable instead of JarID so I can plot both exposed and control jars over one another

start with larvae of control parents

ggplot(data = conparcalc, aes(x = JarID, y = F1WtPerLarvae)) + geom_point(aes(shape=JarTrt, colour=JarTrt))+ labs(x = “Replicate”, y = “F1 larvae count”, title = “Larvae count control parents”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_color_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + scale_shape_manual(name=“Larvae Treatment”, labels=c(“Control”,“OA”), values =c(19,17))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border=element_rect(color=“black”, fill=NA), panel.background = element_rect(colour=“white”), axis.line = element_line(colour = “black”)) + facet_grid(.~CrossID, scale=“free_x”)

plot larvae of exposed parents

ggplot(data = expparcalc, aes(x = JarSeatable, y = F1WtPerLarvae)) + geom_point(aes(shape=JarTrt, colour=JarTrt))+ labs(x = “Replicate”, y = “F1 weight per larvae”, title = “Larvae weight exposed parents”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_color_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + scale_shape_manual(name=“Larvae Treatment”, labels=c(“Control”,“OA”), values =c(19,17))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border=element_rect(color=“black”, fill=NA), panel.background = element_rect(colour=“white”), axis.line = element_line(colour = “black”)) + facet_grid(.~CrossID)

same plot structure as above, but using boxplots instead of points for each cross measured

plot larvae of control parents

ggplot(data = conparcalc, aes(x = CrossID, y = F1WtPerLarvae)) + geom_boxplot(aes(colour=JarTrt))+ labs(x = “CrossID”, y = “F1 weight per larvae”, title = “Larvae weight control parents”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_color_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border=element_rect(color=“black”, fill=NA), panel.background = element_rect(colour=“white”), axis.line = element_line(colour = “black”))

plot larvae of exposed parents

ggplot(data = expparcalc, aes(x = CrossID, y = F1WtPerLarvae)) + geom_boxplot(aes(colour=JarTrt))+ labs(x = “CrossID”, y = “F1 weight per larvae”, title = “Larvae weight control parents”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_color_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border=element_rect(color=“black”, fill=NA), panel.background = element_rect(colour=“white”), axis.line = element_line(colour = “black”))

Make boxplots by parental cross, plotting both larvae treatments over one another.For calcification data; can also be used for cilia counts

ggplot(data = LarvaeCalc, aes(x = CrossID, y = F1WtPerLarvae)) + geom_boxplot(aes(colour=JarTrt)) + labs(x = “Parental Cross”, y = “F1 Weight per larvae (g)”, title = “Larvae weights”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_color_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”))+ scale_fill_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = “black”), panel.border=element_rect(color=“black”,fill=NA))+ facet_grid(.~CrossType, scale=“free_x”)

Plots of difference between parental treatments for morphology

crossdiff <- data.frame(tapply(LarvaeMorph\(MaxFeretDiamum, list(LarvaeMorph\)CrossID, LarvaeMorph$JarTrt), median)) %>% rownames_to_column(“CrossID”) %>% mutate(CrossTemp = CrossID) %>% mutate(ParentTrt = ifelse(startsWith(CrossID, “E”), “Exposed”, “Control”)) %>% separate(CrossTemp, c(“FemaleID”, “MaleID”, “BlockID”, “Fourth”), sep=“_“) %>% select(-Fourth) %>% mutate(Diff = Exposed - Control)

crossdiff\(CrossID<- as.factor(crossdiff\)CrossID) crossdiff\(FemaleID<-as.factor(crossdiff\)FemaleID) crossdiff\(MaleID<-as.factor(crossdiff\)MaleID)

look at cross differences by Male and Female ID

ggplot(data = crossdiff, aes(x = MaleID, y = Diff)) + geom_point(aes(shape=ParentTrt, colour=ParentTrt)) + labs(x = “Male ID”, y = “Change in median shell length (um) per familycontrol to OA conditions”) + scale_colour_manual(name=“Parent treatment”, labels= c(“control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + scale_shape_manual(name=“Parent treatment”, labels= c(“control”, “OA”), values = c(19, 17))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = “black”), panel.border=element_rect(colour=“black”, fill=NA))+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5))+ facet_grid(.~FemaleID, scale=“free_x”)

look at overall differences by parent treatment

ggplot(data = crossdiff, aes(x = ParentTrt, y = Diff, fill = ParentTrt)) + geom_boxplot() + labs(x = “Parental Treatment”, y = “Change in median shell length (um) per familycontrol to OA conditions”) + scale_x_discrete(breaks = c(“Control”, “Exposed”), labels = c(“Control”, “OA”)) + scale_fill_manual(values = c(“#00BFC4”, “#F8766D”)) + theme(legend.position = “none”, panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = “black”))

get mean of the parent treatments to make a bargraph

diffmean<- aggregate(Diff~ParentTrt, data=crossdiff, FUN=mean) diffse<- aggregate(Diff~ParentTrt, data=crossdiff, FUN=se)

make a barplot

ggplot(data = diffmean, aes(x = ParentTrt, y = Diff, fill=“grey67”)) + geom_bar(stat=“identity”, position=“dodge”,aes(fill=“grey67”))+ scale_fill_manual(values=“grey67”)+ labs(x = “Parent Treatment”, y = “Change in Length”) + geom_errorbar(aes(ymin= diffmean\(Diff-diffse\)Diff, ymax=diffmean\(Diff+diffse\)Diff),width=0.2, position=position_dodge(.9))+ theme_classic()

Look at differences for calcification

crossdiffcalc <- data.frame(tapply(LarvaeCalc\(F1WtPerLarvae, list(LarvaeCalc\)CrossID, LarvaeCalc$JarTrt), median)) %>% rownames_to_column(“CrossID”) %>% mutate(CrossTemp = CrossID) %>% mutate(ParentTrt = ifelse(startsWith(CrossID, “E”), “Exposed”, “Control”)) %>% separate(CrossTemp, c(“FemaleID”, “MaleID”, “BlockID”, “Fourth”), sep=“_“) %>% select(-Fourth) %>% mutate(Diff = Exposed - Control)

crossdiffcalc\(CrossID<- as.factor(crossdiffcalc\)CrossID) crossdiffcalc\(FemaleID<-as.factor(crossdiffcalc\)FemaleID) crossdiffcalc\(MaleID<-as.factor(crossdiffcalc\)MaleID)

look at cross differences by Male and Female ID

ggplot(data = crossdiffcalc, aes(x = FemaleID, y = Diff)) + geom_point(aes(shape=ParentTrt, colour=ParentTrt)) + labs(x = “Female ID”, y = “Change in median weight per familycontrol to OA conditions”) + scale_colour_manual(name=“Parent treatment”, labels= c(“control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + scale_shape_manual(name=“Parent treatment”, labels= c(“control”, “OA”), values = c(19, 17))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = “black”))+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5))+ facet_grid(.~MaleID, scale=“free_x”)

look at overall differences by parent treatment

ggplot(data = crossdiffcalc, aes(x = ParentTrt, y = Diff, fill = ParentTrt)) + geom_boxplot() + labs(x = “Parental Treatment”, y = “Change in median larvae weight per familycontrol to OA conditions”) + scale_x_discrete(breaks = c(“Control”, “Exposed”), labels = c(“Control”, “OA”)) + scale_fill_manual(values = c(“#00BFC4”, “#F8766D”)) + theme(legend.position = “none”, panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = “black”))

Make plot that looks at calcification and cilia by larvae and parent treatment

par(mar = c(5, 4, 0.5, 0.5), bg = “transparent”) boxplot(LarvaeMorph\(PercentCilia ~ LarvaeMorph\)JarTrt * LarvaeMorph\(CrossType, xlim = c(0, 5), xlab = "Larvae Treatment", ylab = "Percent Cilia", names = c("Control", "OA", "Control", "OA"), col = c("powderblue", "red")) rect(xleft = -0.2, ybottom = 0, xright = 2.5, ytop = 1, border = FALSE, col = rgb(0, 0, 0.5, alpha = 0.1)) rect(xleft = 2.5, ybottom = 0, xright = 5.2, ytop = 1, border = FALSE, col = rgb(0.5, 0, 0, alpha = 0.1)) text(x = 1.25 , y = 0.000007, "Control Parents") text(x = 3.75, y = 0.000007, "OA Parents") boxplot(LarvaeMorph\)PercentCilia ~ LarvaeMorph\(JarTrt * LarvaeMorph\)CrossType, xlim = c(0, 5), xlab = “Larvae Treatment”, ylab = “”, names = c(“Control”, “OA”, “Control”, “OA”), col = c(“powderblue”, “red”), add = TRUE)

```

get plots for means of parent and larvae treatment; use LarvaeCalc dataset because it only has one value per jar

get the data as means for treatments

meancil<- aggregate(PercentCilia~CrossTypeJarTrt, data=LarvaeCalc, FUN= mean) secil<- aggregate(PercentCilia~CrossTypeJarTrt, data=LarvaeCalc, FUN= se) #plot the data as bargraphs with ggplot ggplot(data = meancil, aes(x = CrossType, y = PercentCilia, fill=JarTrt)) + geom_bar(stat=“identity”, aes(fill=JarTrt), position=“dodge”)+ labs(x = “Parent Treatment”, y = “Proportion Cilia Extruded”) + scale_fill_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“blue4”, “red2”)) + geom_errorbar(aes(ymin= meancil\(PercentCilia-secil\)PercentCilia, ymax=meancil\(PercentCilia+secil\)PercentCilia),width=0.2, position=position_dodge(.9))+ theme_classic()

look at the data within each group

Histdata<-unite(LarvaeCalc, Group, c(CrossType, JarTrt), sep=“_“) ggplot(Histdata, aes(x=PercentCilia)) + geom_histogram()+ facet_grid(~Group) #run analysis to look for significance aov1<- aov(PercentCilia~CrossType*JarTrt, data=LarvaeCalc) summary(aov1) shapiro.test(aov1$residuals) #does not meet assumption leveneTest(aov1)# meets assumption

since the proportion data do not meet the assumption of normality, try a transformation